-
Notifications
You must be signed in to change notification settings - Fork 253
Expand file tree
/
Copy pathdraw.py
More file actions
1883 lines (1699 loc) · 82.1 KB
/
draw.py
File metadata and controls
1883 lines (1699 loc) · 82.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
###############################################################################
# #
# RMG - Reaction Mechanism Generator #
# #
# Copyright (c) 2002-2023 Prof. William H. Green (whgreen@mit.edu), #
# Prof. Richard H. West (r.west@neu.edu) and the RMG Team (rmg_dev@mit.edu) #
# #
# Permission is hereby granted, free of charge, to any person obtaining a #
# copy of this software and associated documentation files (the 'Software'), #
# to deal in the Software without restriction, including without limitation #
# the rights to use, copy, modify, merge, publish, distribute, sublicense, #
# and/or sell copies of the Software, and to permit persons to whom the #
# Software is furnished to do so, subject to the following conditions: #
# #
# The above copyright notice and this permission notice shall be included in #
# all copies or substantial portions of the Software. #
# #
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR #
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, #
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING #
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER #
# DEALINGS IN THE SOFTWARE. #
# #
###############################################################################
"""
This module provides functionality for automatic two-dimensional drawing of the
`skeletal formulae <http://en.wikipedia.org/wiki/Skeletal_formula>`_ of a wide
variety of organic and inorganic molecules. The general method for creating
these drawings is to utilize the :meth:`draw()` method of the :class:`Molecule`
you wish to draw; this wraps a call to :meth:`MoleculeDrawer.draw()`, where the
molecule drawing algorithm begins. Advanced use may require use of the
:class:`MoleculeDrawer` class directly.
The `Cairo <http://cairographics.org/>`_ 2D graphics library is used to create
the drawings. The :class:`MoleculeDrawer` class module will fail gracefully if
Cairo is not installed.
The implementation uses the 2D coordinate generation of rdKit to find coordinates,
then uses Cairo to render the atom.
"""
import logging
import math
import os.path
import re
import itertools
try:
import cairocffi as cairo
except ImportError:
try:
import cairo
except ImportError:
cairo = None
import numpy as np
from rdkit.Chem import AllChem
from rmgpy.molecule.molecule import Atom, Molecule, Bond
from rmgpy.molecule.pathfinder import find_shortest_path
from rmgpy.qm.molecule import Geometry
################################################################################
def create_new_surface(file_format, target=None, width=1024, height=768):
"""
Create a new surface of the specified `file_format`:
"png" for :class:`ImageSurface`
"svg" for :class:`SVGSurface`
"pdf" for :class:`PDFSurface`
"ps" for :class:`PSSurface`
The surface will be written to the `target` parameter , which can be a
path to save the surface to, or file-like object with a `write()` method.
You can also optionally specify the `width` and `height` of the generated
surface if you know what it is; otherwise a default size of 1024 by 768 is
used.
"""
file_format = file_format.lower()
if file_format == 'png':
surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, int(width), int(height))
elif file_format == 'svg':
surface = cairo.SVGSurface(target, width, height)
elif file_format == 'pdf':
surface = cairo.PDFSurface(target, width, height)
elif file_format == 'ps':
surface = cairo.PSSurface(target, width, height)
else:
raise ValueError(
'Invalid value "{0}" for type parameter; valid values are "png", "svg", "pdf", and "ps".'.format(type))
return surface
################################################################################
class AdsorbateDrawingError(Exception):
"""
When something goes wrong trying to draw an adsorbate.
"""
pass
class MoleculeDrawer(object):
"""
This class provides functionality for drawing the skeletal formula of
molecules using the Cairo 2D graphics engine. The most common use case is
simply::
MoleculeDrawer().draw(molecule, file_format='png', path='molecule.png')
where ``molecule`` is the :class:`Molecule` object to draw. You can also
pass a dict of options to the constructor to affect how the molecules are
drawn.
"""
def __init__(self, options=None):
self.options = {
'fontFamily': 'sans',
'fontSizeNormal': 12,
'fontSizeSubscript': 8,
'bondLength': 24,
'padding': 2,
}
if options:
self.options.update(options)
self.molecule = None
self.cycles = None
self.ringSystems = None
self.coordinates = None
self.symbols = None
self.implicitHydrogens = None
self.left = 0.0
self.top = 0.0
self.right = 0.0
self.bottom = 0.0
self.surface = None
self.cr = None
def clear(self):
self.molecule = None
self.cycles = None
self.ringSystems = None
self.coordinates = None
self.symbols = None
self.implicitHydrogens = None
self.left = 0.0
self.top = 0.0
self.right = 0.0
self.bottom = 0.0
self.surface = None
self.cr = None
def draw(self, molecule, file_format, target=None):
"""
Draw the given `molecule` using the given image `file_format` - pdf, svg, ps, or
png. If `path` is given, the drawing is saved to that location on disk. The
`options` dict is an optional set of key-value pairs that can be used to
control the generated drawing.
This function returns the Cairo surface and context used to create the
drawing, as well as a bounding box for the molecule being drawn as the
tuple (`left`, `top`, `width`, `height`).
"""
# The Cairo 2D graphics library (and its Python wrapper) is required for
# the molecule drawing algorithm
if cairo is None:
logging.error('Cairo not found; molecule will not be drawn.')
return
# Make a copy of the molecule so we don't modify the original
self.molecule = molecule.copy(deep=True)
# Remove all unlabeled hydrogen atoms from the copied atoms and bonds, as
# they are not drawn
# However, if this would remove all atoms, then don't remove any
atoms_to_remove = []
self.implicitHydrogens = {}
surface_sites = []
for atom in self.molecule.atoms:
if isinstance(atom, Atom) and atom.is_hydrogen() and atom.label == '':
if not any(bond.is_hydrogen_bond() for bond in atom.bonds.values()):
atoms_to_remove.append(atom)
elif atom.is_surface_site():
surface_sites.append(atom)
if len(atoms_to_remove) < len(self.molecule.atoms) - len(surface_sites):
for atom in atoms_to_remove:
for atom2 in atom.bonds:
try:
self.implicitHydrogens[atom2] += 1
except KeyError:
self.implicitHydrogens[atom2] = 1
self.molecule.remove_atom(atom)
# Generate information about any cycles present in the molecule, as
# they will need special attention
self._find_ring_groups()
# Handle carbon monoxide special case
if self.molecule.get_formula() == 'CO' and len(atoms_to_remove) == 0:
# RDKit does not accept atom type O4tc
for atom in self.molecule.atoms:
if atom.symbol == 'O':
self.molecule.remove_atom(atom)
self.symbols = ['CO']
self.molecule.atoms[0].charge = 0 # don't label the C as - if you're not drawing the O with a +
self.coordinates = np.array([[0, 0]], float)
else:
# Generate the coordinates to use to draw the molecule
try:
# before getting coordinates, make all bonds single and then
# replace the bonds after generating coordinates. This avoids
# bugs with RDKit
old_bond_dictionary = self._make_single_bonds()
if molecule.contains_surface_site():
try:
self._connect_surface_sites()
self._generate_coordinates()
self._disconnect_surface_sites()
except AdsorbateDrawingError as e:
self._disconnect_surface_sites()
self._generate_coordinates(fix_surface_sites=False)
else:
self._generate_coordinates()
self._replace_bonds(old_bond_dictionary)
# Generate labels to use
self._generate_atom_labels()
except (ValueError, np.linalg.LinAlgError) as e:
logging.error('Error while drawing molecule {0}: {1}'.format(molecule.to_smiles(), e))
import sys, traceback
exc_type, exc_value, exc_traceback = sys.exc_info()
traceback.print_exc()
return None, None, None
except KeyError:
logging.error('KeyError occured when drawing molecule, likely because'
' the molecule contained non-standard bond orders in the'
' get_resonance_hybrid method. These cannot be drawn since'
' they cannot be sent to RDKit for coordinate placing.')
raise
self.coordinates[:, 1] *= -1
self.coordinates *= self.options['bondLength']
# Handle some special cases
if self.symbols == ['H', 'H']:
# Render as H2 instead of H-H
self.molecule.remove_atom(self.molecule.atoms[-1])
self.symbols = ['H2']
self.coordinates = np.array([[0, 0]], float)
elif molecule.is_isomorphic(Molecule(smiles='[O][O]')):
# Render as O2 instead of O-O
self.molecule.remove_atom(self.molecule.atoms[-1])
self.molecule.atoms[0].radical_electrons = 0
self.symbols = ['O2']
self.coordinates = np.array([[0, 0]], float)
elif self.symbols == ['OH', 'O'] or self.symbols == ['O', 'OH']:
# Render as HO2 instead of HO-O or O-OH
self.molecule.remove_atom(self.molecule.atoms[-1])
self.symbols = ['O2H']
self.coordinates = np.array([[0, 0]], float)
elif self.symbols == ['OH', 'OH']:
# Render as H2O2 instead of HO-OH or O-OH
self.molecule.remove_atom(self.molecule.atoms[-1])
self.symbols = ['O2H2']
self.coordinates = np.array([[0, 0]], float)
elif self.symbols == ['O', 'C', 'O']:
# Render as CO2 instead of O=C=O
self.molecule.remove_atom(self.molecule.atoms[0])
self.molecule.remove_atom(self.molecule.atoms[-1])
self.symbols = ['CO2']
self.coordinates = np.array([[0, 0]], float)
elif self.symbols == ['H', 'H', 'X']:
# Render as H2::X instead of crashing on H-H::X (vdW bond)
self.molecule.remove_atom(self.molecule.atoms[0])
self.symbols = ['H2', 'X']
self.coordinates = np.array([[0, -0.5], [0, 0.5]], float) * self.options['bondLength']
# Create a dummy surface to draw to, since we don't know the bounding rect
# We will copy this to another surface with the correct bounding rect
surface0 = create_new_surface(file_format=file_format, target=None)
cr0 = cairo.Context(surface0)
# Render using Cairo
self.render(cr0)
# Create the real surface with the appropriate size
xoff = self.left
yoff = self.top
width = self.right - self.left
height = self.bottom - self.top
self.surface = create_new_surface(file_format=file_format, target=target, width=width, height=height)
self.cr = cairo.Context(self.surface)
# Draw white background
self.cr.set_source_rgba(1.0, 1.0, 1.0, 1.0)
self.cr.paint()
self.render(self.cr, offset=(-xoff, -yoff))
if target is not None:
# Finish Cairo drawing
# Save PNG of drawing if appropriate
if isinstance(target, str):
ext = os.path.splitext(target)[1].lower()
if ext == '.png':
self.surface.write_to_png(target)
else:
self.surface.finish()
else:
self.surface.finish()
return self.surface, self.cr, (xoff, yoff, width, height)
def _find_ring_groups(self):
"""
Find all of the cycles in the current molecule, and group them into
sets of adjacent cycles.
"""
# Find all of the cycles in the molecule
self.cycles = self.molecule.get_smallest_set_of_smallest_rings()
self.ringSystems = []
# If the molecule contains cycles, find them and group them
if len(self.cycles) > 0:
# Split the list of cycles into groups
# Each atom in the molecule should belong to exactly zero or one such groups
for cycle in self.cycles:
found = False
for ringSystem in self.ringSystems:
for ring in ringSystem:
if any([atom in ring for atom in cycle]) and not found:
ringSystem.append(cycle)
found = True
if not found:
self.ringSystems.append([cycle])
def _generate_coordinates(self, fix_surface_sites=True):
"""
Generate the 2D coordinates to be used when drawing the current
molecule. The function uses rdKits 2D coordinate generation.
Updates the self.coordinates Array in place.
If `fix_surface_sites` is True, then the surface sites are placed
at the bottom of the molecule.
"""
atoms = self.molecule.atoms
natoms = len(atoms)
# Initialize array of coordinates
self.coordinates = coordinates = np.zeros((natoms, 2))
# If there are only one or two atoms to draw, then determining the
# coordinates is trivial
if natoms == 1:
self.coordinates[0, :] = [0.0, 0.0]
return self.coordinates
elif natoms == 2:
if atoms[0].is_surface_site():
self.coordinates[0, :] = [0.0, -0.5]
self.coordinates[1, :] = [0.0, 0.5]
elif atoms[1].is_surface_site():
self.coordinates[0, :] = [0.0, 0.5]
self.coordinates[1, :] = [0.0, -0.5]
else:
self.coordinates[0, :] = [-0.5, 0.0]
self.coordinates[1, :] = [0.5, 0.0]
return self.coordinates
# Decide whether we can use RDKit or have to generate coordinates ourselves
for atom in self.molecule.atoms:
if atom.charge != 0:
use_rdkit = False
break
else: # didn't break
use_rdkit = True
if not use_rdkit:
if len(self.cycles) > 0:
# Cyclic molecule
backbone = self._find_cyclic_backbone()
self._generate_ring_system_coordinates(backbone)
# Flatten backbone so that it contains a list of the atoms in the
# backbone, rather than a list of the cycles in the backbone
backbone = list(set([atom for cycle in backbone for atom in cycle]))
else:
# Straight chain molecule
backbone = self._find_straight_chain_backbone()
self._generate_straight_chain_coordinates(backbone)
# If backbone is linear, then rotate so that the bond is parallel to the
# horizontal axis
vector0 = coordinates[atoms.index(backbone[1]), :] - coordinates[atoms.index(backbone[0]), :]
for i in range(2, len(backbone)):
vector = coordinates[atoms.index(backbone[i]), :] - coordinates[atoms.index(backbone[i - 1]), :]
if np.linalg.norm(vector - vector0) > 1e-4:
break
else:
angle = math.atan2(vector0[0], vector0[1]) - math.pi / 2
rot = np.array([[math.cos(angle), math.sin(angle)],
[-math.sin(angle), math.cos(angle)]], float)
# need to keep self.coordinates and coordinates referring to the same object
self.coordinates = coordinates = np.dot(coordinates, rot)
# If two atoms lie on top of each other, push them apart a bit
# This is ugly, but at least the mess you end up with isn't as misleading
# as leaving everything piled on top of each other at the origin
for atom1, atom2 in itertools.combinations(backbone, 2):
i1, i2 = atoms.index(atom1), atoms.index(atom2)
if np.linalg.norm(coordinates[i1, :] - coordinates[i2, :]) < 0.5:
coordinates[i1, 0] -= 0.3
coordinates[i2, 0] += 0.3
coordinates[i1, 1] -= 0.2
coordinates[i2, 1] += 0.2
# Center backbone at origin
xmin = np.min(coordinates[:, 0])
xmax = np.max(coordinates[:, 0])
ymin = np.min(coordinates[:, 1])
ymax = np.max(coordinates[:, 1])
xmid = 0.5 * (xmax + xmin)
ymid = 0.5 * (ymax + ymin)
for atom in backbone:
index = atoms.index(atom)
coordinates[index, 0] -= xmid
coordinates[index, 1] -= ymid
# We now proceed by calculating the coordinates of the functional groups
# attached to the backbone
# Each functional group is independent, although they may contain further
# branching and cycles
# In general substituents should try to grow away from the origin to
# minimize likelihood of overlap
self._generate_neighbor_coordinates(backbone)
else:
# Use RDKit 2D coordinate generation:
# Generate the RDkit molecule from the RDkit molecule, use geometry
# in order to match the atoms in the rdmol with the atoms in the
# RMG molecule (which is required to extract coordinates).
self.geometry = Geometry(None, None, self.molecule, None)
rdmol, rd_atom_idx = self.geometry.rd_build()
AllChem.Compute2DCoords(rdmol)
# Extract the coordinates from each atom.
for atom in atoms:
index = rd_atom_idx[atom]
point = rdmol.GetConformer(0).GetAtomPosition(index)
coordinates[index, :] = [point.x * 0.6, point.y * 0.6]
# RDKit generates some molecules more vertically than horizontally,
# Especially linear ones. This will reflect any molecule taller than
# it is wide across the line y=x
ranges = np.ptp(coordinates, axis=0)
if ranges[1] > ranges[0]:
temp = np.copy(coordinates)
coordinates[:, 0] = temp[:, 1]
coordinates[:, 1] = temp[:, 0]
# For surface species
if fix_surface_sites and self.molecule.contains_surface_site():
if len(self.molecule.atoms) == 1:
return coordinates
sites = [atom for atom in self.molecule.atoms if atom.is_surface_site()]
if len(sites) == 1:
# rotate them so the site is at the bottom.
site = sites[0]
if site.bonds:
adatom = next(iter(site.bonds))
vector0 = coordinates[atoms.index(site), :] - coordinates[atoms.index(adatom), :]
angle = math.atan2(vector0[0], vector0[1]) - math.pi
rot = np.array([[math.cos(angle), math.sin(angle)], [-math.sin(angle), math.cos(angle)]], float)
self.coordinates = coordinates = np.dot(coordinates, rot)
else:
# van der Waals
index = atoms.index(site)
coordinates[index, 1] = min(coordinates[:, 1]) - 0.8 # just move the site down a bit
coordinates[index, 0] = coordinates[:, 0].mean() # and center it
elif len(sites) <= 4:
# Rotate so the line of best fit through the adatoms is horizontal.
# find atoms bonded to sites
adatoms = [next(iter(site.bonds)) for site in sites]
adatom_indices = [atoms.index(a) for a in adatoms]
# find the best fit line through the bonded atoms
x = coordinates[adatom_indices, 0]
y = coordinates[adatom_indices, 1]
A = np.vstack([x, np.ones(len(x))]).T
m, c = np.linalg.lstsq(A, y, rcond=None)[0]
# rotate so the line is horizontal
angle = -math.atan(m)
rot = np.array([[math.cos(angle), math.sin(angle)], [-math.sin(angle), math.cos(angle)]], float)
self.coordinates = coordinates = np.dot(coordinates, rot)
# if the line is above the middle, flip it
not_site_indices = [atoms.index(a) for a in atoms if not a.is_surface_site()]
if coordinates[adatom_indices, 1].mean() > coordinates[not_site_indices, 1].mean():
coordinates[:, 1] *= -1
x = coordinates[adatom_indices, 0]
y = coordinates[adatom_indices, 1]
site_y_pos = min(min(y) - 0.8, min(coordinates[not_site_indices, 1]) - 0.5)
if max(y) - site_y_pos > 1.5:
raise AdsorbateDrawingError("Adsorbate bond too long")
for x1, x2 in itertools.combinations(x, 2):
if abs(x1 - x2) < 0.2:
raise AdsorbateDrawingError("Sites overlapping")
for site, x_pos in zip(sites, x):
index = atoms.index(site)
coordinates[index, 1] = site_y_pos
coordinates[index, 0] = x_pos
else:
# more than 4 surface sites? leave them alone
pass
def _find_cyclic_backbone(self):
"""
Return a set of atoms to use as the "backbone" of the molecule. For
cyclics this is simply the largest ring system.
"""
count = [len(set([atom for ring in ringSystem for atom in ring])) for ringSystem in self.ringSystems]
index = 0
for i in range(1, len(self.ringSystems)):
if count[i] > count[index]:
index = i
return self.ringSystems[index]
def _find_straight_chain_backbone(self):
"""
Return a set of atoms to use as the "backbone" of the molecule. For
non-cyclics this is the largest straight chain between atoms. If carbon
atoms are present, then we define the backbone only in terms of them.
"""
# Find the terminal atoms - those that only have one explicit bond
terminal_atoms = [atom for atom in self.molecule.atoms if len(atom.bonds) == 1]
assert len(terminal_atoms) >= 2
# Starting from each terminal atom, find the longest straight path to
# another terminal
# The longest found is the backbone
backbone = []
paths = []
for atom in terminal_atoms:
paths.extend(self._find_straight_chain_paths([atom]))
# Remove any paths that don't end in a terminal atom
# (I don't think this should remove any!)
paths = [path for path in paths if path[-1] in terminal_atoms]
# Remove all paths shorter than the maximum
length = max([len(path) for path in paths])
paths = [path for path in paths if len(path) == length]
# Prefer the paths with the most carbon atoms
carbons = [sum([1 for atom in path if atom.is_carbon()]) for path in paths]
max_carbons = max(carbons)
paths = [path for path, carbon in zip(paths, carbons) if carbon == max_carbons]
# At this point we could choose any remaining path, so simply choose the first
backbone = paths[0]
assert len(backbone) > 1
assert backbone[0] in terminal_atoms
assert backbone[-1] in terminal_atoms
return backbone
def _find_straight_chain_paths(self, atoms0):
"""
Finds the paths containing the list of atoms `atoms0` in the
current molecule. The atoms are assumed to already be in a path, with
``atoms0[0]`` being a terminal atom.
"""
atom1 = atoms0[-1]
paths = []
for atom2 in atom1.bonds:
if atom2 not in atoms0:
atoms = atoms0[:]
atoms.append(atom2)
if not self.molecule.is_atom_in_cycle(atom2):
paths.extend(self._find_straight_chain_paths(atoms))
if len(paths) == 0:
paths.append(atoms0[:])
return paths
def _generate_ring_system_coordinates(self, atoms):
"""
For a ring system composed of the given cycles of `atoms`, update the
coordinates of each atom in the system.
"""
coordinates = self.coordinates
atoms = atoms[:]
processed = []
# Lay out largest cycle in ring system first
cycle = atoms[0]
for cycle0 in atoms[1:]:
if len(cycle0) > len(cycle):
cycle = cycle0
angle = - 2 * math.pi / len(cycle)
radius = 1.0 / (2 * math.sin(math.pi / len(cycle)))
for i, atom in enumerate(cycle):
index = self.molecule.atoms.index(atom)
coordinates[index, :] = [math.cos(math.pi / 2 + i * angle), math.sin(math.pi / 2 + i * angle)]
coordinates[index, :] *= radius
atoms.remove(cycle)
processed.append(cycle)
# If there are other cycles, then try to lay them out as well
while len(atoms) > 0:
# Find the largest cycle that shares one or two atoms with a ring that's
# already been processed
cycle = None
for cycle0 in atoms:
for cycle1 in processed:
count = sum([1 for atom in cycle0 if atom in cycle1])
if count == 1 or count == 2:
if cycle is None or len(cycle0) > len(cycle): cycle = cycle0
cycle0 = cycle1
if cycle is None:
break
atoms.remove(cycle)
# Shuffle atoms in cycle such that the common atoms come first
# Also find the average center of the processed cycles that touch the
# current cycles
found = False
common_atoms = []
count = 0
center0 = np.zeros(2, float)
for cycle1 in processed:
found = False
for atom in cycle1:
if atom in cycle and atom not in common_atoms:
common_atoms.append(atom)
found = True
if found:
center1 = np.zeros(2, float)
for atom in cycle1:
center1 += coordinates[cycle1.index(atom), :]
center1 /= len(cycle1)
center0 += center1
count += 1
center0 /= count
if len(common_atoms) > 1:
index0 = cycle.index(common_atoms[0])
index1 = cycle.index(common_atoms[1])
if (index0 == 0 and index1 == len(cycle) - 1) or (index1 == 0 and index0 == len(cycle) - 1):
cycle = cycle[-1:] + cycle[0:-1]
if cycle.index(common_atoms[1]) < cycle.index(common_atoms[0]):
cycle.reverse()
index = cycle.index(common_atoms[0])
cycle = cycle[index:] + cycle[0:index]
# Determine center of cycle based on already-assigned positions of
# common atoms (which won't be changed)
if len(common_atoms) == 1 or len(common_atoms) == 2:
# Center of new cycle is reflection of center of adjacent cycle
# across common atom or bond
center = np.zeros(2, float)
for atom in common_atoms:
center += coordinates[self.molecule.atoms.index(atom), :]
center /= len(common_atoms)
vector = center - center0
center += vector
radius = 1.0 / (2 * math.sin(math.pi / len(cycle)))
else:
# Use any three points to determine the point equidistant from these
# three; this is the center
index0 = self.molecule.atoms.index(common_atoms[0])
index1 = self.molecule.atoms.index(common_atoms[1])
index2 = self.molecule.atoms.index(common_atoms[2])
A = np.zeros((2, 2), float)
b = np.zeros((2), float)
A[0, :] = 2 * (coordinates[index1, :] - coordinates[index0, :])
A[1, :] = 2 * (coordinates[index2, :] - coordinates[index0, :])
b[0] = coordinates[index1, 0] ** 2 + coordinates[index1, 1] ** 2 - coordinates[index0, 0] ** 2 - coordinates[index0, 1] ** 2
b[1] = coordinates[index2, 0] ** 2 + coordinates[index2, 1] ** 2 - coordinates[index0, 0] ** 2 - coordinates[index0, 1] ** 2
center = np.linalg.solve(A, b)
radius = np.linalg.norm(center - coordinates[index0, :])
start_angle = 0.0
end_angle = 0.0
if len(common_atoms) == 1:
# We will use the full 360 degrees to place the other atoms in the cycle
start_angle = math.atan2(-vector[1], vector[0])
end_angle = start_angle + 2 * math.pi
elif len(common_atoms) >= 2:
# Divide other atoms in cycle equally among unused angle
vector = coordinates[cycle.index(common_atoms[-1]), :] - center
start_angle = math.atan2(vector[1], vector[0])
vector = coordinates[cycle.index(common_atoms[0]), :] - center
end_angle = math.atan2(vector[1], vector[0])
# Place remaining atoms in cycle
if end_angle < start_angle:
end_angle += 2 * math.pi
d_angle = (end_angle - start_angle) / (len(cycle) - len(common_atoms) + 1)
else:
end_angle -= 2 * math.pi
d_angle = (end_angle - start_angle) / (len(cycle) - len(common_atoms) + 1)
count = 1
for i in range(len(common_atoms), len(cycle)):
angle = start_angle + count * d_angle
index = self.molecule.atoms.index(cycle[i])
# Check that we aren't reassigning any atom positions
# This version assumes that no atoms belong at the origin, which is
# usually fine because the first ring is centered at the origin
if np.linalg.norm(coordinates[index, :]) < 1e-4:
vector = np.array([math.cos(angle), math.sin(angle)], float)
coordinates[index, :] = center + radius * vector
count += 1
# We're done assigning coordinates for this cycle, so mark it as processed
processed.append(cycle)
def _generate_straight_chain_coordinates(self, atoms):
"""
Update the coordinates for the linear straight chain of `atoms` in
the current molecule.
"""
coordinates = self.coordinates
# First atom goes at origin
index0 = self.molecule.atoms.index(atoms[0])
coordinates[index0, :] = [0.0, 0.0]
# Second atom goes on x-axis (for now; this could be improved!)
index1 = self.molecule.atoms.index(atoms[1])
vector = np.array([1.0, 0.0], float)
if atoms[0].bonds[atoms[1]].is_triple():
rotate_positive = False
else:
rotate_positive = True
rot = np.array([[math.cos(-math.pi / 6), math.sin(-math.pi / 6)],
[-math.sin(-math.pi / 6), math.cos(-math.pi / 6)]], float)
vector = np.array([1.0, 0.0], float)
vector = np.dot(rot, vector)
coordinates[index1, :] = coordinates[index0, :] + vector
# Other atoms
for i in range(2, len(atoms)):
atom0 = atoms[i - 2]
atom1 = atoms[i - 1]
atom2 = atoms[i]
index1 = self.molecule.atoms.index(atom1)
index2 = self.molecule.atoms.index(atom2)
bond0 = atom0.bonds[atom1]
bond = atom1.bonds[atom2]
# Angle of next bond depends on the number of bonds to the start atom
num_bonds = len(atom1.bonds)
if num_bonds == 2:
if (bond0.is_triple() or bond.is_triple()) or (bond0.is_double() and bond.is_double()):
# Rotate by 0 degrees towards horizontal axis (to get angle of 180)
angle = 0.0
else:
# Rotate by 60 degrees towards horizontal axis (to get angle of 120)
angle = math.pi / 3
elif num_bonds == 3:
# Rotate by 60 degrees towards horizontal axis (to get angle of 120)
angle = math.pi / 3
elif num_bonds == 4:
# Rotate by 0 degrees towards horizontal axis (to get angle of 90)
angle = 0.0
elif num_bonds == 5:
# Rotate by 36 degrees towards horizontal axis (to get angle of 144)
angle = math.pi / 5
elif num_bonds == 6:
# Rotate by 0 degrees towards horizontal axis (to get angle of 180)
angle = 0.0
# Determine coordinates for atom
if angle != 0:
if not rotate_positive: angle = -angle
rot = np.array([[math.cos(angle), math.sin(angle)], [-math.sin(angle), math.cos(angle)]], float)
vector = np.dot(rot, vector)
rotate_positive = not rotate_positive
coordinates[index2, :] = coordinates[index1, :] + vector
def _generate_neighbor_coordinates(self, backbone):
"""
Recursively update the coordinates for the atoms immediately adjacent
to the atoms in the molecular `backbone`.
"""
atoms = self.molecule.atoms
coordinates = self.coordinates
for i in range(len(backbone)):
atom0 = backbone[i]
index0 = atoms.index(atom0)
# Determine bond angles of all previously-determined bond locations for
# this atom
bond_angles = []
for atom1 in atom0.bonds:
index1 = atoms.index(atom1)
if atom1 in backbone:
vector = coordinates[index1, :] - coordinates[index0, :]
angle = math.atan2(vector[1], vector[0])
bond_angles.append(angle)
bond_angles.sort()
best_angle = 2 * math.pi / len(atom0.bonds)
regular = True
for angle1, angle2 in zip(bond_angles[0:-1], bond_angles[1:]):
if all([abs(angle2 - angle1 - (i + 1) * best_angle) > 1e-4 for i in range(len(atom0.bonds))]):
regular = False
if regular:
# All the bonds around each atom are equally spaced
# We just need to fill in the missing bond locations
# Determine rotation angle and matrix
rot = np.array([[math.cos(best_angle), -math.sin(best_angle)],
[math.sin(best_angle), math.cos(best_angle)]], float)
# Determine the vector of any currently-existing bond from this atom
vector = None
for atom1 in atom0.bonds:
index1 = atoms.index(atom1)
if atom1 in backbone or np.linalg.norm(coordinates[index1, :]) > 1e-4:
vector = coordinates[index1, :] - coordinates[index0, :]
# Iterate through each neighboring atom to this backbone atom
# If the neighbor is not in the backbone and does not yet have
# coordinates, then we need to determine coordinates for it
for atom1 in atom0.bonds:
if atom1 not in backbone and np.linalg.norm(coordinates[atoms.index(atom1), :]) < 1e-4:
occupied = True
count = 0
# Rotate vector until we find an unoccupied location
while occupied and count < len(atom0.bonds):
count += 1
occupied = False
vector = np.dot(rot, vector)
for atom2 in atom0.bonds:
index2 = atoms.index(atom2)
if np.linalg.norm(coordinates[index2, :] - coordinates[index0, :] - vector) < 1e-4:
occupied = True
coordinates[atoms.index(atom1), :] = coordinates[index0, :] + vector
self._generate_functional_group_coordinates(atom0, atom1)
else:
# The bonds are not evenly spaced (e.g. due to a ring)
# We place all of the remaining bonds evenly over the reflex angle
start_angle = max(bond_angles)
end_angle = min(bond_angles)
if 0.0 < end_angle - start_angle < math.pi:
end_angle += 2 * math.pi
elif 0.0 > end_angle - start_angle > -math.pi:
start_angle -= 2 * math.pi
d_angle = (end_angle - start_angle) / (len(atom0.bonds) - len(bond_angles) + 1)
index = 1
for atom1 in atom0.bonds:
if atom1 not in backbone and np.linalg.norm(coordinates[atoms.index(atom1), :]) < 1e-4:
angle = start_angle + index * d_angle
index += 1
vector = np.array([math.cos(angle), math.sin(angle)], float)
vector /= np.linalg.norm(vector)
coordinates[atoms.index(atom1), :] = coordinates[index0, :] + vector
self._generate_functional_group_coordinates(atom0, atom1)
def _generate_functional_group_coordinates(self, atom0, atom1):
"""
For the functional group starting with the bond from `atom0` to `atom1`,
generate the coordinates of the rest of the functional group. `atom0` is
treated as if a terminal atom. `atom0` and `atom1` must already have their
coordinates determined. `atoms` is a list of the atoms to be drawn, `bonds`
is a dictionary of the bonds to draw, and `coordinates` is an array of the
coordinates for each atom to be drawn. This function is designed to be
recursive.
"""
atoms = self.molecule.atoms
coordinates = self.coordinates
index0 = atoms.index(atom0)
index1 = atoms.index(atom1)
# Determine the vector of any currently-existing bond from this atom
# (We use the bond to the previous atom here)
vector = coordinates[index0, :] - coordinates[index1, :]
bond_angle = math.atan2(vector[1], vector[0])
# Check to see if atom1 is in any cycles in the molecule
ring_system = None
for ring_sys in self.ringSystems:
if any(atom1 in ring for ring in ring_sys):
ring_system = ring_sys
if ring_system is not None:
# atom1 is part of a ring system, so we need to process the entire
# ring system at once
# Generate coordinates for all atoms in the ring system
self._generate_ring_system_coordinates(ring_system)
cycle_atoms = list(set([atom for ring in ring_system for atom in ring]))
coordinates_cycle = np.zeros_like(self.coordinates)
for atom in cycle_atoms:
coordinates_cycle[atoms.index(atom), :] = coordinates[atoms.index(atom), :]
# Rotate the ring system coordinates so that the line connecting atom1
# and the center of mass of the ring is parallel to that between
# atom0 and atom1
center = np.zeros(2, float)
for atom in cycle_atoms:
center += coordinates_cycle[atoms.index(atom), :]
center /= len(cycle_atoms)
vector0 = center - coordinates_cycle[atoms.index(atom1), :]
angle = math.atan2(vector[1] - vector0[1], vector[0] - vector0[0])
rot = np.array([[math.cos(angle), -math.sin(angle)], [math.sin(angle), math.cos(angle)]], float)
coordinates_cycle = np.dot(coordinates_cycle, rot)
# Translate the ring system coordinates to the position of atom1
coordinates_cycle += coordinates[atoms.index(atom1), :] - coordinates_cycle[atoms.index(atom1), :]
for atom in cycle_atoms:
coordinates[atoms.index(atom), :] = coordinates_cycle[atoms.index(atom), :]
# Generate coordinates for remaining neighbors of ring system,
# continuing to recurse as needed
self._generate_neighbor_coordinates(cycle_atoms)
else:
# atom1 is not in any rings, so we can continue as normal
# Determine rotation angle and matrix
num_bonds = len(atom1.bonds)
angle = 0.0
if num_bonds == 2:
bond0, bond = list(atom1.bonds.values())
if (bond0.is_triple() or bond.is_triple()) or (bond0.is_double() and bond.is_double()):
angle = math.pi
else:
angle = 2 * math.pi / 3
# Make sure we're rotating such that we move away from the origin,
# to discourage overlap of functional groups
rot1 = np.array([[math.cos(angle), -math.sin(angle)], [math.sin(angle), math.cos(angle)]], float)
rot2 = np.array([[math.cos(angle), math.sin(angle)], [-math.sin(angle), math.cos(angle)]], float)
vector1 = coordinates[index1, :] + np.dot(rot1, vector)
vector2 = coordinates[index1, :] + np.dot(rot2, vector)
if bond_angle < -0.5 * math.pi or bond_angle > 0.5 * math.pi:
angle = abs(angle)
else:
angle = -abs(angle)
else:
angle = 2 * math.pi / num_bonds
rot = np.array([[math.cos(angle), -math.sin(angle)], [math.sin(angle), math.cos(angle)]], float)
# Iterate through each neighboring atom to this backbone atom
# If the neighbor is not in the backbone, then we need to determine
# coordinates for it
for atom, bond in atom1.bonds.items():
if atom is not atom0:
occupied = True
count = 0
# Rotate vector until we find an unoccupied location
while occupied and count < len(atom1.bonds):
count += 1
occupied = False
vector = np.dot(rot, vector)
for atom2 in atom1.bonds:
index2 = atoms.index(atom2)
if np.linalg.norm(coordinates[index2, :] - coordinates[index1, :] - vector) < 1e-4:
occupied = True
coordinates[atoms.index(atom), :] = coordinates[index1, :] + vector
# Recursively continue with functional group
self._generate_functional_group_coordinates(atom1, atom)
def _generate_atom_labels(self):
"""
Generate the labels to use for each atom in the drawing. In general,
all atoms are labeled with their symbols except carbon. Some carbon
atoms are also labeled in certain circumstances. The labels also
contain any implicit hydrogen atoms (i.e. those hydrogen atoms not
explicitly drawn in the skeletal formula).
"""
atoms = self.molecule.atoms
self.symbols = symbols = [atom.symbol for atom in atoms]
for i in range(len(symbols)):
# Don't label carbon atoms, unless there are only one or two heavy atoms
# or they are isotopically labeled
if symbols[i] == 'C' and len(symbols) > 2:
if (len(atoms[i].bonds) > 1 or (atoms[i].radical_electrons == 0 and atoms[i].charge == 0)) \
and atoms[i].element.isotope == -1:
symbols[i] = ''
# Do label atoms that have only double bonds to one or more labeled atoms
changed = True