Skip to content

Commit 303b981

Browse files
committed
fix edge cases in atomselect and bondguesser with empty molecules or single atoms
1 parent b944247 commit 303b981

File tree

5 files changed

+133
-0
lines changed

5 files changed

+133
-0
lines changed

moleculekit/atomselect/analyze.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,22 @@ def analyze(mol: Molecule, bonds, _profile=False):
7070
from moleculekit.periodictable import periodictable
7171
import numpy as np
7272

73+
if mol.numAtoms == 0:
74+
empty_bool = np.zeros(0, dtype=bool)
75+
empty_uint = np.zeros(0, dtype=np.uint32)
76+
return {
77+
"waters": empty_bool.copy(),
78+
"lipids": empty_bool.copy(),
79+
"ions": empty_bool.copy(),
80+
"residues": empty_uint.copy(),
81+
"protein_bb": empty_bool.copy(),
82+
"nucleic_bb": empty_bool.copy(),
83+
"protein": empty_bool.copy(),
84+
"nucleic": empty_bool.copy(),
85+
"fragments": empty_uint.copy(),
86+
"sidechain": empty_bool.copy(),
87+
}
88+
7389
insertion = np.unique(mol.insertion, return_inverse=True)[1].astype(np.uint32)
7490
chain_id = np.unique(mol.chain, return_inverse=True)[1].astype(np.uint32)
7591
seg_id = np.unique(mol.segid, return_inverse=True)[1].astype(np.uint32)

moleculekit/atomselect/atomselect.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,12 @@ def fn(x, y):
268268

269269

270270
def atomselect(mol, selection, bonds, _debug=False, _analysis=None, _return_ast=False):
271+
if mol.numAtoms == 0:
272+
mask = np.zeros(0, dtype=bool)
273+
if _return_ast:
274+
return mask, None
275+
return mask
276+
271277
if _analysis is None:
272278
_analysis = analyze(mol, bonds)
273279

moleculekit/molecule.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1125,6 +1125,9 @@ def _guessBonds(self, rdkit=False):
11251125
"""
11261126
from moleculekit.bondguesser import guess_bonds, guess_bonds_rdkit
11271127

1128+
if self.numAtoms == 0:
1129+
return np.empty((0, 2), dtype=Molecule._dtypes["bonds"])
1130+
11281131
if self.numFrames == 0:
11291132
raise RuntimeError(
11301133
"No coordinates found in the molecule. Cannot guess bonds."

tests/test_atomselect.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,89 @@ def _test_atomselect(pdbid, sel, _pdbmols):
183183
pickle.dump(results, f)
184184

185185

186+
def _test_empty_molecule():
187+
mol = Molecule()
188+
selections = [
189+
"all",
190+
"protein",
191+
"nucleic",
192+
"water",
193+
"lipid",
194+
"ion",
195+
"backbone",
196+
"sidechain",
197+
"hydrogen",
198+
"noh",
199+
"name CA",
200+
"resname ALA",
201+
"resid 1",
202+
"chain A",
203+
"index 0",
204+
"serial 1",
205+
"element C",
206+
"mass < 5",
207+
"x < 6",
208+
"beta >= 0",
209+
"not protein",
210+
]
211+
for sel in selections:
212+
res = mol.atomselect(sel)
213+
assert res.shape == (0,), f"Expected empty result for '{sel}', got shape {res.shape}"
214+
assert res.dtype == bool, f"Expected bool dtype for '{sel}', got {res.dtype}"
215+
216+
217+
def _test_single_atom_molecule():
218+
mol = Molecule()
219+
mol.empty(1)
220+
mol.record[:] = "ATOM"
221+
mol.name[:] = "CA"
222+
mol.resname[:] = "ALA"
223+
mol.resid[:] = 1
224+
mol.chain[:] = "A"
225+
mol.element[:] = "C"
226+
mol.coords = np.zeros((1, 3, 1), dtype=np.float32)
227+
228+
expected_true = [
229+
"all",
230+
"name CA",
231+
"resname ALA",
232+
"resid 1",
233+
"chain A",
234+
"element C",
235+
"index 0",
236+
"serial 1",
237+
"noh",
238+
"not nucleic",
239+
"not water",
240+
"x < 6",
241+
"beta >= 0",
242+
]
243+
for sel in expected_true:
244+
res = mol.atomselect(sel)
245+
assert res.shape == (1,), f"Expected shape (1,) for '{sel}', got {res.shape}"
246+
assert res.dtype == bool
247+
assert res[0], f"Expected True for '{sel}'"
248+
249+
expected_false = [
250+
"nucleic",
251+
"water",
252+
"lipid",
253+
"ion",
254+
"hydrogen",
255+
"name CB",
256+
"resname GLY",
257+
"resid 2",
258+
"chain B",
259+
"element N",
260+
"index 1",
261+
]
262+
for sel in expected_false:
263+
res = mol.atomselect(sel)
264+
assert res.shape == (1,), f"Expected shape (1,) for '{sel}', got {res.shape}"
265+
assert res.dtype == bool
266+
assert not res[0], f"Expected False for '{sel}'"
267+
268+
186269
def _test_numprop_list_equality():
187270
pdb = os.path.join(curr_dir, "test_atomselect", "test.pdb")
188271
mol = Molecule(pdb)

tests/test_bondguesser.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,31 @@ def _test_bond_guessing(pdbid):
4444
assert np.array_equal(bonds, bondsref)
4545

4646

47+
def _test_zero_atoms():
48+
from moleculekit.molecule import Molecule
49+
from moleculekit.bondguesser import guess_bonds
50+
51+
mol = Molecule()
52+
bonds = guess_bonds(mol)
53+
assert bonds.shape == (0, 2)
54+
assert bonds.dtype == np.uint32
55+
56+
57+
def _test_single_atom():
58+
from moleculekit.molecule import Molecule
59+
from moleculekit.bondguesser import guess_bonds
60+
61+
mol = Molecule()
62+
mol.empty(1)
63+
mol.element[:] = "C"
64+
mol.name[:] = "CA"
65+
mol.coords = np.zeros((1, 3, 1), dtype=np.float32)
66+
67+
bonds = guess_bonds(mol)
68+
assert bonds.shape == (0, 2)
69+
assert bonds.dtype == np.uint32
70+
71+
4772
def _test_solvated_bond_guessing():
4873
from moleculekit.molecule import Molecule, calculateUniqueBonds
4974
from moleculekit.bondguesser import guess_bonds

0 commit comments

Comments
 (0)