Python API Reference
Installation
pip install sciforma # package name on PyPI
import sci_form # import nameConformer Generation
embed
def embed(smiles: str, seed: int = 42) -> ConformerResultGenerate a 3D conformer for a single molecule via ETKDGv2.
from sci_form import embed
result = embed("CCO", seed=42)
print(result) # ConformerResult(smiles='CCO', atoms=9, time=8.3ms)
if result.is_ok():
positions = result.get_positions() # list of (x, y, z) tuples
print(f"O atom: {positions[2]}")embed_batch
def embed_batch(
smiles_list: list[str],
seed: int = 42,
num_threads: int = 0,
) -> list[ConformerResult]Batch-generate conformers. num_threads=0 auto-detects CPU count.
from sci_form import embed_batch
results = embed_batch(["CCO", "c1ccccc1", "CC(=O)O"], seed=42)
for r in results:
if r.is_ok():
print(f"{r.smiles}: {r.num_atoms} atoms, {r.time_ms:.1f}ms")parse
def parse(smiles: str) -> dictParse SMILES to a molecular graph without generating 3D coordinates.
from sci_form import parse
mol = parse("CCO")
print(mol["num_atoms"]) # 9
print(mol["atoms"][0]) # {'element': 8, 'hybridization': 'SP3', 'formal_charge': 0}Gasteiger Charges
charges
def charges(smiles: str) -> ChargeResultCompute Gasteiger-Marsili partial charges (6 iterations).
from sci_form import charges
result = charges("CCO")
print(result) # ChargeResult(n_atoms=9, total_charge=0.0000, iterations=6)
print(result.charges) # [-0.387, -0.042, -0.228, 0.123, ...]
print(result.total_charge) # ~0.0charges_configured
def charges_configured(
smiles: str,
max_iter: int = 6,
initial_damping: float = 1.0,
convergence_threshold: float = 1e-8,
) -> ChargeResultGasteiger-Marsili charges with full configuration control. Covers all main-group elements up to period 4 (Li, Be, Na, Mg, Al, Si, …) and handles formal charges on charged species.
from sci_form import charges_configured
# Charged species
result = charges_configured("[NH4+]", max_iter=12, convergence_threshold=1e-10)
print(f"Total charge: {result.total_charge:.2f}") # ~+1.0SASA
sasa
def sasa(
elements: list[int],
coords: list[float],
probe_radius: float = 1.4,
) -> SasaResultCompute solvent-accessible surface area (Shrake-Rupley).
from sci_form import embed, sasa
conf = embed("CCO")
result = sasa(conf.elements, conf.coords, probe_radius=1.4)
print(result) # SasaResult(total=82.31 Ų, n_atoms=9, probe=1.40 Å)
print(result.total_sasa) # Ų
print(result.atom_sasa) # per-atom listEHT — Extended Hückel Theory
eht_calculate
def eht_calculate(
elements: list[int],
coords: list[float],
k: float = 1.75,
) -> EhtResultRun an EHT semi-empirical calculation. k is the Wolfsberg-Helmholtz constant (default 1.75).
from sci_form import embed, eht_calculate
conf = embed("CCO")
eht = eht_calculate(conf.elements, conf.coords)
print(eht) # EhtResult(n_mo=9, gap=7.831 eV, HOMO=-12.453 eV, LUMO=-4.622 eV)
print(eht.homo_energy) # eV
print(eht.gap) # HOMO-LUMO gap in eV
print(eht.energies) # all orbital energies (eV)eht_orbital_mesh
def eht_orbital_mesh(
elements: list[int],
coords: list[float],
mo_index: int,
spacing: float = 0.2,
isovalue: float = 0.02,
) -> dictGenerate a 3D isosurface mesh for a molecular orbital at the given isovalue.
from sci_form import embed, eht_orbital_mesh
conf = embed("CCO")
eht = eht_calculate(conf.elements, conf.coords)
# HOMO mesh
homo_idx = eht.homo_index
mesh = eht_orbital_mesh(conf.elements, conf.coords, homo_idx, isovalue=0.02)
print(mesh.keys()) # dict_keys(['vertices', 'normals', 'indices', 'num_triangles', 'isovalue'])
# Use in Three.js / Open3D / matplotlib
vertices = mesh["vertices"] # flat [x,y,z, ...] array
triangles = mesh["indices"] # flat [i0,i1,i2, ...] triangle index arrayPopulation Analysis
population
def population(
elements: list[int],
coords: list[float],
) -> PopulationResultMulliken and Löwdin population analysis from EHT.
from sci_form import embed, population
conf = embed("CCO")
pop = population(conf.elements, conf.coords)
print(pop) # PopulationResult(n_atoms=9, total_mulliken=0.0000)
print(pop.mulliken_charges) # per-atom Mulliken charges
print(pop.lowdin_charges) # per-atom Löwdin charges
print(pop.total_charge_mulliken) # sum ≈ 0 for neutralDipole Moment
dipole
def dipole(
elements: list[int],
coords: list[float],
) -> DipoleResultCompute molecular dipole moment in Debye.
from sci_form import embed, dipole
conf = embed("CCO")
d = dipole(conf.elements, conf.coords)
print(d) # DipoleResult(1.847 Debye)
print(d.magnitude) # 1.847 (Debye)
print(d.vector) # [x, y, z] in Debye
print(d.unit) # "Debye"Density of States
dos
def dos(
elements: list[int],
coords: list[float],
sigma: float = 0.3,
e_min: float = -30.0,
e_max: float = 5.0,
n_points: int = 500,
) -> DosResultCompute total DOS from EHT orbital energies with Gaussian smearing
from sci_form import embed, dos
import matplotlib.pyplot as plt
conf = embed("c1ccccc1")
result = dos(conf.elements, conf.coords, sigma=0.2, e_min=-25.0, e_max=5.0, n_points=300)
plt.plot(result.energies, result.total_dos)
plt.xlabel("Energy (eV)")
plt.ylabel("DOS (states/eV)")
plt.show()Molecular Alignment
rmsd
def rmsd(
coords: list[float],
reference: list[float],
) -> AlignmentResultKabsch optimal alignment and RMSD computation.
from sci_form import embed, rmsd
conf1 = embed("CCO", seed=42)
conf2 = embed("CCO", seed=123)
result = rmsd(conf1.coords, conf2.coords)
print(result) # AlignmentResult(rmsd=0.0342 Å)
print(result.aligned_coords) # conf1 coords after optimal rotation onto conf2Force Field Energy
uff_energy
def uff_energy(smiles: str, coords: list[float]) -> floatEvaluate UFF force field energy in kcal/mol.
from sci_form import embed, uff_energy
conf = embed("CCO")
e = uff_energy("CCO", conf.coords)
print(f"UFF energy: {e:.3f} kcal/mol")Materials
unit_cell
def unit_cell(
a: float, b: float, c: float,
alpha: float = 90.0,
beta: float = 90.0,
gamma: float = 90.0,
) -> UnitCellCreate a periodic unit cell from crystallographic parameters (a, b, c in Å; angles in degrees).
from sci_form import unit_cell
cell = unit_cell(10.0, 10.0, 10.0) # cubic
cell = unit_cell(5.0, 5.0, 12.0, 90, 90, 120) # hexagonal
print(cell.volume) # ų
print(cell.lattice) # 3×3 matrixassemble_framework
def assemble_framework(
topology: str = "pcu",
metal: int = 30,
geometry: str = "octahedral",
lattice_a: float = 10.0,
supercell: int = 1,
) -> CrystalStructureAssemble a MOF-type framework crystal structure. topology ∈ {"pcu", "dia", "sql"}. geometry ∈ {"linear", "trigonal", "tetrahedral", "square_planar", "octahedral"}.
from sci_form import assemble_framework
mof = assemble_framework(topology="pcu", metal=30, geometry="octahedral", lattice_a=26.3)
print(mof.num_atoms) # total atoms
print(mof.elements) # atomic numbers
print(mof.cart_coords) # Cartesian coordinates
print(mof.frac_coords) # fractional coordinatesSpectroscopy (Track D)
stda_uvvis
def stda_uvvis(
elements: list[int],
coords: list[float],
sigma: float = 0.3,
e_min: float = 1.0,
e_max: float = 8.0,
n_points: int = 500,
broadening: str = "gaussian", # "gaussian" or "lorentzian"
) -> StdaUvVisSpectrumPyCompute UV-Vis spectrum via sTDA on EHT MO transitions. Returns a StdaUvVisSpectrumPy with .wavelengths_nm, .intensities, .excitations (list of StdaExcitationPy), .gap, .homo_energy, .lumo_energy, .n_transitions, .notes.
from sci_form import embed, stda_uvvis
conf = embed("c1ccccc1", seed=42)
spec = stda_uvvis(conf.elements, conf.coords, sigma=0.3, e_min=1.0, e_max=8.0)
print(f"Gap: {spec.gap:.2f} eV")
max_i = max(spec.intensities)
lam_max = spec.wavelengths_nm[spec.intensities.index(max_i)]
print(f"λ_max ≈ {lam_max:.1f} nm")
for ex in sorted(spec.excitations, key=lambda e: -e.oscillator_strength)[:3]:
print(f" {ex.wavelength_nm:.1f} nm f={ex.oscillator_strength:.4f} MO{ex.from_mo}→MO{ex.to_mo}")vibrational_analysis
def vibrational_analysis(
elements: list[int],
coords: list[float],
method: str = "eht", # "eht", "pm3", or "xtb"
) -> VibrationalAnalysisPyNumerical Hessian (VibrationalAnalysisPy with .n_atoms, .modes (list of VibrationalModePy), .n_real_modes, .zpve_ev, .method, .notes, .thermochemistry.
Each VibrationalModePy has .frequency_cm1, .ir_intensity (km/mol), .displacement (list, length 3N), .is_real, .label (functional group annotation).
ThermochemistryPy (RRHO, 298.15 K): .zpve_kcal, .thermal_energy_kcal, .entropy_vib_cal, .gibbs_correction_kcal.
from sci_form import embed, vibrational_analysis
conf = embed("CCO", seed=42)
vib = vibrational_analysis(conf.elements, conf.coords, method="xtb")
print(f"ZPVE: {vib.zpve_ev:.4f} eV, {vib.n_real_modes} real modes")
print(f"Gibbs correction: {vib.thermochemistry.gibbs_correction_kcal:.3f} kcal/mol")
real = [m for m in vib.modes if m.is_real]
strongest = max(real, key=lambda m: m.ir_intensity)
print(f"Strongest IR: {strongest.frequency_cm1:.1f} cm⁻¹ "
f"({strongest.ir_intensity:.1f} km/mol) [{strongest.label or '?'}]")ir_spectrum
def ir_spectrum(
elements: list[int],
coords: list[float],
method: str = "xtb",
) -> IrSpectrumPyConvenience wrapper: runs vibrational_analysis and applies default Lorentzian broadening. Returns IrSpectrumPy with .wavenumbers, .intensities, .transmittance, .peaks (list of IrPeakPy), .gamma.
from sci_form import embed, ir_spectrum
conf = embed("CCO", seed=42)
spec = ir_spectrum(conf.elements, conf.coords, method="xtb")
import matplotlib.pyplot as plt
plt.plot(spec.wavenumbers, spec.intensities)
plt.xlabel("Wavenumber (cm⁻¹)")
plt.gca().invert_xaxis()
plt.show()ir_spectrum_broadened
def ir_spectrum_broadened(
analysis: VibrationalAnalysisPy,
gamma: float = 15.0,
wn_min: float = 600.0,
wn_max: float = 4000.0,
n_points: int = 1000,
broadening: str = "lorentzian", # "lorentzian" or "gaussian"
) -> IrSpectrumPyBroadened IR spectrum from an existing VibrationalAnalysisPy with full parameter control. Includes both absorbance (.intensities) and transmittance (.transmittance) axes.
from sci_form import ir_spectrum_broadened
# Gaussian broadening for publication-quality output
spec = ir_spectrum_broadened(vib, gamma=10.0, wn_min=400.0, wn_max=4000.0,
n_points=2000, broadening="gaussian")
print(f"{len(spec.peaks)} labelled peaks")nmr_shifts
def nmr_shifts(smiles: str) -> NmrShiftResultPyPredict representative shifts for the default nucleus of each element in the molecule. Returns NmrShiftResultPy with legacy fields such as .h_shifts, .c_shifts, and .other_shifts for the expanded registry. No 3D geometry needed.
Each ChemicalShiftPy has .atom_index, .element, .shift_ppm, .environment, .confidence.
from sci_form import nmr_shifts
shifts = nmr_shifts("CCO")
for h in shifts.h_shifts:
print(f"H#{h.atom_index}: {h.shift_ppm:.2f} ppm [{h.environment}] conf={h.confidence:.2f}")
for c in shifts.c_shifts:
print(f"C#{c.atom_index}: {c.shift_ppm:.1f} ppm [{c.environment}]")nmr_shifts_for_nucleus
def nmr_shifts_for_nucleus(smiles: str, nucleus: str) -> list[ChemicalShiftPy]Predict shifts for one requested nucleus, including the expanded fast-inference nuclei such as 2H, 35Cl, 79Br, 195Pt, and 207Pb.
from sci_form import nmr_shifts_for_nucleus
pt = nmr_shifts_for_nucleus("[Pt]", "195Pt")
for peak in pt:
print(f"Pt#{peak.atom_index}: {peak.shift_ppm:.1f} ppm [{peak.environment}]")giao_nmr
def giao_nmr(
elements: list[int],
coords: list[float],
nucleus: str = "1H",
charge: int = 0,
multiplicity: int = 1,
max_scf_iter: int = 100,
allow_basis_fallback: bool = False,
use_parallel_eri: bool = False,
) -> GiaoNmrResultPyRun the public SCF-backed GIAO route for one requested nucleus. Returns GiaoNmrResultPy with .chemical_shifts, .shieldings, .scf_converged, .scf_iterations, .fallback_elements, and .notes.
The current public path is restricted to singlet closed-shell systems. By default it rejects elements that only have the hydrogen-like fallback basis in the SCF stack.
from sci_form import giao_nmr
water = [8, 1, 1]
coords = [
0.0, 0.0, 0.1173,
0.0, 0.7572, -0.4692,
0.0, -0.7572, -0.4692,
]
result = giao_nmr(water, coords, nucleus="1H")
print(result.chemical_shifts)
print(result.notes[0])nmr_couplings
def nmr_couplings(
smiles: str,
coords: list[float] = [], # flat [x0,y0,z0,...]; empty → free-rotation average
) -> list[JCouplingPy]Predict H–H J-coupling constants. With 3D coords uses the Karplus equation for ³J; topology only gives free-rotation averages (~5.3 Hz). Parameterized pathways: H-C-C-H (Altona-Sundaralingam), H-C-N-H (Bystrov), H-C-O-H, H-C-S-H.
Each JCouplingPy has .h1_index, .h2_index, .j_hz, .n_bonds, .coupling_type.
from sci_form import embed, nmr_couplings
conf = embed("CC", seed=42)
couplings = nmr_couplings("CC", conf.coords)
for j in couplings:
print(f"{j.n_bonds}J(H{j.h1_index},H{j.h2_index}) = {j.j_hz:.2f} Hz [{j.coupling_type}]")ensemble_j_couplings
def ensemble_j_couplings(
smiles: str,
conformer_coords: list[list[float]],
energies_kcal: list[float],
temperature_k: float = 298.15,
) -> list[JCouplingPy]Boltzmann-average ³J couplings over a conformer ensemble.
from sci_form import embed_batch, ensemble_j_couplings
results = embed_batch(["CCCC"] * 20, seed=42)
coords = [r.coords for r in results if r.is_ok()]
energies = [0.0] * len(coords) # or compute from uff_energy
couplings = ensemble_j_couplings("CCCC", coords, energies, temperature_k=298.15)
for jc in couplings:
print(f"H{jc.h1_index}–H{jc.h2_index}: {jc.j_hz:.2f} Hz")nmr_spectrum
def nmr_spectrum(
smiles: str,
nucleus: str = "1H", # e.g. "1H", "13C", "35Cl", "79Br", "195Pt"
gamma: float = 0.01,
ppm_min: float = -2.0,
ppm_max: float = 14.0,
n_points: int = 2000,
) -> NmrSpectrumPyFull NMR spectrum pipeline. Returns NmrSpectrumPy with .ppm_axis, .intensities, .peaks (list of NmrPeakPy), .nucleus, .gamma.
The ¹H path still models vicinal splitting explicitly. Other nuclei are rendered as fast relative spectra with nucleus-specific linewidth defaults.
from sci_form import nmr_spectrum
spec = nmr_spectrum("CCO", nucleus="1H")
import matplotlib.pyplot as plt
plt.plot(spec.ppm_axis, spec.intensities)
plt.xlabel("δ (ppm)")
plt.gca().invert_xaxis()
plt.show()hose_codes
def hose_codes(smiles: str, max_radius: int = 4) -> list[HoseCodePy]Generate HOSE code fingerprints for all atoms. Each HoseCodePy has .atom_index, .element, .code_string, .radius.
Semi-Empirical QM
pm3_calculate
def pm3_calculate(
elements: list[int], # atomic numbers
coords: list[float], # flat [x0,y0,z0,...]
) -> Pm3ResultPM3 NDDO semi-empirical SCF. Supports transition metals via PM3(tm): Ti,Cr,Mn,Fe,Co,Ni,Cu,Zn,Al,Si,Ge.
Returns Pm3Result with .orbital_energies, .electronic_energy, .nuclear_repulsion, .total_energy, .heat_of_formation, .n_basis, .n_electrons, .homo_energy, .lumo_energy, .gap, .mulliken_charges, .scf_iterations, .converged.
from sci_form import embed, pm3_calculate
conf = embed("CCO", seed=42)
pm3 = pm3_calculate(conf.elements, conf.coords)
print(f"HOF: {pm3.heat_of_formation:.2f} kcal/mol, gap: {pm3.gap:.3f} eV")xtb_calculate
def xtb_calculate(
elements: list[int],
coords: list[float],
) -> XtbResultGFN0-xTB tight-binding with self-consistent charges. Supports 25 elements including transition metals (Ti,Cr,Mn,Fe,Co,Ni,Cu,Zn,Ru,Pd,Ag,Pt,Au).
Returns XtbResult with .orbital_energies, .electronic_energy, .repulsive_energy, .total_energy, .n_basis, .n_electrons, .homo_energy, .lumo_energy, .gap, .mulliken_charges, .scc_iterations, .converged.
from sci_form import embed, xtb_calculate
conf = embed("CCO", seed=42)
xtb = xtb_calculate(conf.elements, conf.coords)
print(f"xTB gap: {xtb.gap:.3f} eV, converged: {xtb.converged}")gfn1_calculate
def gfn1_calculate(
elements: list[int],
coords: list[float],
) -> Gfn1ResultPyGFN1-xTB tight-binding with shell-resolved charges and D3-style dispersion.
Returns Gfn1ResultPy with .orbital_energies, .electronic_energy, .repulsive_energy, .dispersion_energy, .total_energy, .n_basis, .n_electrons, .homo_energy, .lumo_energy, .gap, .mulliken_charges, .shell_charges, .scc_iterations, .converged.
from sci_form import embed, gfn1_calculate
conf = embed("CCO", seed=42)
gfn1 = gfn1_calculate(conf.elements, conf.coords)
print(f"GFN1 total energy: {gfn1.total_energy:.3f} eV")gfn2_calculate
def gfn2_calculate(
elements: list[int],
coords: list[float],
) -> Gfn2ResultPyGFN2-xTB tight-binding with multipole electrostatics, D4-style dispersion, and halogen-bond correction terms.
Returns Gfn2ResultPy with .orbital_energies, .electronic_energy, .repulsive_energy, .dispersion_energy, .halogen_bond_energy, .total_energy, .n_basis, .n_electrons, .homo_energy, .lumo_energy, .gap, .mulliken_charges, .atomic_dipoles, .atomic_quadrupoles, .scc_iterations, .converged.
from sci_form import embed, gfn2_calculate
conf = embed("CCO", seed=42)
gfn2 = gfn2_calculate(conf.elements, conf.coords)
print(f"GFN2 total energy: {gfn2.total_energy:.3f} eV")Ab-initio / Neural Potentials
hf3c_calculate
def hf3c_calculate(
elements: list[int],
coords: list[float],
max_scf_iter: int = 300,
n_cis_states: int = 5,
corrections: bool = True,
) -> Hf3cResultPyMinimal-basis Hartree-Fock with D3, gCP, and SRB corrections.
ani_calculate
def ani_calculate(
elements: list[int],
coords: list[float],
) -> AniResultPyANI-2x neural network potential for H, C, N, O, F, S, Cl.
Bond Orders & Frontier Analysis
bond_orders
def bond_orders(
elements: list[int],
coords: list[float],
) -> BondOrderResultPyWiberg and Mayer bond orders from EHT density matrix. Returns .atom_pairs, .distances, .wiberg, .mayer, .wiberg_valence, .mayer_valence.
frontier_descriptors
def frontier_descriptors(
elements: list[int],
coords: list[float],
) -> FrontierDescriptorsPyHOMO/LUMO atom contributions and dual descriptor. Returns .homo_atom_contributions, .lumo_atom_contributions, .dual_descriptor, .homo_energy, .lumo_energy, .gap.
Reactivity
fukui_descriptors
def fukui_descriptors(
elements: list[int],
coords: list[float],
) -> FukuiDescriptorsPyCondensed Fukui functions f⁺, f⁻, f⁰. Returns .condensed_f_plus, .condensed_f_minus, .condensed_f_radical, .condensed_dual_descriptor, .gap, .validity_notes.
reactivity_ranking
def reactivity_ranking(
elements: list[int],
coords: list[float],
) -> ReactivityRankingPyRanked atomic sites for nucleophilic, electrophilic, and radical attack.
empirical_pka
def empirical_pka(smiles: str) -> EmpiricalPkaResultPyGraph-heuristic pKa estimation. Returns .acidic_sites, .basic_sites.
ML Properties
ml_descriptors
def ml_descriptors(smiles: str) -> MolecularDescriptorsTopology-derived descriptors. No 3D required. Returns .molecular_weight, .n_heavy_atoms, .n_hbd, .n_hba, .fsp3, .wiener_index, etc.
ml_predict
def ml_predict(smiles: str) -> MlPropertyResultLipinski/ADME prediction. Returns .logp, .molar_refractivity, .log_solubility, .druglikeness, .lipinski_violations, .lipinski_passes.
Stereochemistry
stereo_analysis
def stereo_analysis(
smiles: str,
coords: list[float] = [], # flat [x0,y0,z0,...]; empty → topology-only
) -> StereoAnalysisPyAssign CIP priorities and determine R/S at stereocenters and E/Z at double bonds.
class StereoAnalysisPy:
stereocenters: list[StereocenterPy]
double_bonds: list[DoubleBondStereoPy]
n_stereocenters: int
n_double_bonds: int
class StereocenterPy:
atom_index: int
element: int
substituent_indices: list[int]
priorities: list[int] # CIP rank (1 = highest)
configuration: str | None # "R" or "S"
class DoubleBondStereoPy:
atom1: int
atom2: int
configuration: str | None # "E" or "Z"
high_priority_sub1: int | None
high_priority_sub2: int | NoneExample:
from sci_form import embed, stereo_analysis
# Chiral center
conf = embed("C(F)(Cl)(Br)I", seed=42)
stereo = stereo_analysis("C(F)(Cl)(Br)I", conf.coords)
print(f"{stereo.n_stereocenters} stereocenters")
for sc in stereo.stereocenters:
print(f" Atom {sc.atom_index}: {sc.configuration}")
# Double bond geometry
stereo2 = stereo_analysis("C/C=C/C") # trans-2-butene
print(f"E/Z: {stereo2.double_bonds[0].configuration}")Implicit Solvation
nonpolar_solvation
def nonpolar_solvation(
elements: list[int],
coords: list[float],
probe_radius: float = 1.4,
) -> NonPolarSolvationPyNon-polar solvation
class NonPolarSolvationPy:
energy_kcal_mol: float
atom_contributions: list[float]
atom_sasa: list[float]
total_sasa: floatgb_solvation
def gb_solvation(
elements: list[int],
coords: list[float],
charges: list[float],
solvent_dielectric: float = 78.5,
solute_dielectric: float = 1.0,
probe_radius: float = 1.4,
) -> GbSolvationPyGB/SA electrostatic solvation (HCT Born radii + Still GB equation).
class GbSolvationPy:
electrostatic_energy_kcal_mol: float
nonpolar_energy_kcal_mol: float
total_energy_kcal_mol: float
born_radii: list[float]
charges: list[float]
solvent_dielectric: float
solute_dielectric: floatExample:
from sci_form import embed, charges as gasteiger_charges, nonpolar_solvation, gb_solvation
conf = embed("CCO", seed=42)
q = gasteiger_charges("CCO").charges
np_solv = nonpolar_solvation(conf.elements, conf.coords)
print(f"Non-polar ΔG: {np_solv.energy_kcal_mol:.2f} kcal/mol")
gb = gb_solvation(conf.elements, conf.coords, q)
print(f"Total solvation: {gb.total_energy_kcal_mol:.2f} kcal/mol")Ring Perception
sssr
def sssr(smiles: str) -> SssrResultPySmallest Set of Smallest Rings (Horton's algorithm) with Hückel aromaticity.
class SssrResultPy:
rings: list[RingInfoPy]
atom_ring_count: list[int] # rings containing each atom
atom_ring_sizes: list[list[int]] # ring sizes per atom
ring_size_histogram: dict[int, int] # size → count
class RingInfoPy:
atoms: list[int] # atom indices
size: int
is_aromatic: boolExample:
from sci_form import sssr
r = sssr("c1ccc2ccccc2c1") # naphthalene
print(f"{r.rings[0].size}-membered ring, aromatic={r.rings[0].is_aromatic}")
print(f"Histogram: {r.ring_size_histogram}") # {6: 2}Fingerprints (ECFP)
ecfp
def ecfp(
smiles: str,
radius: int = 2, # ECFP4 = radius 2
n_bits: int = 2048,
) -> ECFPFingerprintPyExtended-Connectivity Fingerprint (Morgan algorithm). Atom invariants: element, degree, valence, ring membership, aromaticity, formal charge.
class ECFPFingerprintPy:
n_bits: int
on_bits: list[int] # set bit indices
radius: int
density: float # fraction of set bitstanimoto
def tanimoto(
fp1: ECFPFingerprintPy,
fp2: ECFPFingerprintPy,
) -> floatJaccard-Tanimoto similarity
Example:
from sci_form import ecfp, tanimoto
fp1 = ecfp("c1ccccc1", radius=2, n_bits=2048)
fp2 = ecfp("Cc1ccccc1", radius=2, n_bits=2048)
print(f"Tanimoto: {tanimoto(fp1, fp2):.3f}") # ~0.5–0.7
print(f"Self-sim: {tanimoto(fp1, fp1):.3f}") # 1.0Conformer Clustering
butina_cluster
def butina_cluster(
conformers: list[list[float]], # list of flat coord arrays
rmsd_cutoff: float = 1.0,
) -> ClusterResultPyTaylor-Butina single-linkage RMSD clustering.
class ClusterResultPy:
n_clusters: int
assignments: list[int] # cluster index per conformer
centroid_indices: list[int] # representative conformer per cluster
cluster_sizes: list[int]
rmsd_cutoff: floatrmsd_matrix
def rmsd_matrix(conformers: list[list[float]]) -> list[list[float]]O(N²) pairwise RMSD matrix with Kabsch alignment.
filter_diverse
def filter_diverse(
conformers: list[list[float]],
rmsd_cutoff: float = 1.0,
) -> list[int]Return indices of cluster centroid (diverse representative) conformers.
Example:
from sci_form import embed_batch, butina_cluster, filter_diverse
results = embed_batch(["CCCC"] * 20, seed=42)
coords = [r.coords for r in results if r.is_ok()]
clusters = butina_cluster(coords, rmsd_cutoff=0.5)
print(f"{clusters.n_clusters} clusters, sizes: {clusters.cluster_sizes}")
diverse_idx = filter_diverse(coords, rmsd_cutoff=1.0)
print(f"Diverse set: {len(diverse_idx)} conformers")Transport / Batch Streaming
pack_conformers
def pack_conformers(results: list[ConformerResult]) -> RecordBatchPack conformer results into Arrow-compatible columnar format for efficient transfer.
from sci_form import embed_batch, pack_conformers
results = embed_batch(["CCO", "c1ccccc1"])
batch = pack_conformers(results)
print(batch.num_rows) # 2
print(batch.column_names) # ['smiles', 'num_atoms', 'coords', ...]
print(batch.float_data) # {'coords': [...], 'time_ms': [...]}split_worker_tasks
def split_worker_tasks(
smiles: list[str],
n_workers: int = 4,
seed: int = 42,
) -> list[dict]Split a SMILES list into balanced worker tasks for multi-process/web-worker dispatch.
estimate_workers
def estimate_workers(n_items: int, max_workers: int = 8) -> intEstimate the optimal number of workers for a given batch size.
Return Types
ConformerResult
| Attribute | Type | Description |
|---|---|---|
smiles | str | Input SMILES |
num_atoms | int | Atom count |
coords | list[float] | Flat coords [x₀,y₀,z₀,…] |
elements | list[int] | Atomic numbers |
bonds | list[tuple] | (a, b, order_str) |
error | str | None | Error message or None |
time_ms | float | Generation time ms |
Methods: get_positions() → list[tuple[float,float,float]], is_ok() → bool
EhtResult
| Attribute | Type | Description |
|---|---|---|
energies | list[float] | All MO energies (eV) |
n_electrons | int | Total electron count |
homo_index | int | Index of HOMO orbital |
lumo_index | int | Index of LUMO orbital |
homo_energy | float | HOMO energy (eV) |
lumo_energy | float | LUMO energy (eV) |
gap | float | HOMO-LUMO gap (eV) |
ChargeResult
| Attribute | Type | Description |
|---|---|---|
charges | list[float] | Per-atom partial charges |
iterations | int | Convergence iterations |
total_charge | float | Sum of all charges |
SasaResult
| Attribute | Type | Description |
|---|---|---|
total_sasa | float | Total SASA in Ų |
atom_sasa | list[float] | Per-atom SASA in Ų |
probe_radius | float | Probe radius used (Å) |
num_points | int | Fibonacci sphere points per atom |
PopulationResult
| Attribute | Type | Description |
|---|---|---|
mulliken_charges | list[float] | Per-atom Mulliken charges |
lowdin_charges | list[float] | Per-atom Löwdin charges |
num_atoms | int | Atom count |
total_charge_mulliken | float | Total Mulliken charge |
DipoleResult
| Attribute | Type | Description |
|---|---|---|
vector | list[float] | Dipole vector [x, y, z] in Debye |
magnitude | float | Dipole magnitude in Debye |
unit | str | Always "Debye" |
DosResult
| Attribute | Type | Description |
|---|---|---|
energies | list[float] | Energy axis (eV) |
total_dos | list[float] | Total DOS curve |
sigma | float | Smearing width used (eV) |
AlignmentResult
| Attribute | Type | Description |
|---|---|---|
rmsd | float | RMSD after Kabsch alignment (Å) |
aligned_coords | list[float] | Transformed coordinates |
Integration Examples
NumPy Array
import numpy as np
from sci_form import embed
result = embed("CCO")
coords = np.array(result.coords).reshape(-1, 3) # (9, 3)
elements = np.array(result.elements) # (9,)Pandas DataFrame
import pandas as pd
from sci_form import embed_batch, charges
smiles = ["CCO", "c1ccccc1", "CC(=O)O"]
results = embed_batch(smiles)
df = pd.DataFrame([
{
"smiles": r.smiles,
"num_atoms": r.num_atoms,
"time_ms": r.time_ms,
"ok": r.is_ok(),
}
for r in results
])RDKit Interop
from rdkit import Chem
from rdkit.Chem import AllChem
from sci_form import embed
result = embed("c1ccccc1")
mol = Chem.MolFromSmiles("c1ccccc1")
mol = Chem.AddHs(mol)
conf = Chem.Conformer(result.num_atoms)
for i in range(result.num_atoms):
conf.SetAtomPosition(i, (
result.coords[i*3],
result.coords[i*3+1],
result.coords[i*3+2],
))
mol.AddConformer(conf, assignId=True)DOS + matplotlib
import matplotlib.pyplot as plt
from sci_form import embed, dos
conf = embed("c1ccccc1") # benzene
result = dos(conf.elements, conf.coords, sigma=0.3)
plt.figure(figsize=(8, 4))
plt.plot(result.energies, result.total_dos, 'b-', lw=1.5)
plt.axvline(x=0, color='k', ls='--', label='Fermi level (HOMO)')
plt.xlabel("Energy (eV)")
plt.ylabel("DOS (states/eV)")
plt.title("Benzene DOS")
plt.legend()
plt.tight_layout()
plt.show()