python code
|
# a descriptor is a semicolon-separated string of individual descriptors
# an individual descriptor is of the form : tag:atName(,atName)+
# the tags must appear in the order given in validTags,
# and the atoms must be present
# examples: 'prot:HD1'' 'prot:HD1,HO;deprot:H1;link:SG' 'stereo_2:C1'
# 'link:C2_2'
# The ''link' tag is an exception, in that the 'atNames' are actually
# LinkEnd.linkCodes. These are atom names, but may in some cases have a '_n'
# suffix where n is an integer. The linkCodes must correspond to linkEnds present in the ChemCompVar
# For all other atNames after the tag, atom (name=atName)
# must be present in the ChemCompVar
# 'stereo' tags are of the form stereo_n, where n is a subType no.
# and are the only tags to contain an underscore
# Here, for all atNames after the tag, atom (name=atName, subType=subTypeNo)
# must be present in the ChemCompVar. The interpretation is that these
# atom subtypes are chosen to give the correct stereochemistry; what that
# stereochemistry is cna be seen by examining the atom network.
# valid special descriptor
validDescriptors = ('neutral',)
# valid tags (key) and the order they must appear in (value)
validTags = {'prot':0, 'deprot':1, 'link':2, 'stereo':3,}
isValid = True
if value is not None and value not in validDescriptors:
chemAtomNames = set()
linkAtomNames = set()
for ca in self.chemComp.chemAtoms:
if isinstance(ca, ChemAtom):
chemAtomNames.add(ca.name)
else:
linkAtomNames.add(ca.name)
tagIndex = -1
lastSubNo = 0
for desc in value.split(';'):
try:
(tag, text) = desc.split(':', 1)
except ValueError:
# no colon in descriptor
print("Malformed descriptor %s: %s does not contain ':'" % (value, `desc`))
isValid = False
break
# check that tags come in order,
# and that _subType come in order for each tag type
splitTag = tag.split('_')
tag = splitTag[0]
if len(splitTag) > 1:
if tag == 'stereo':
subNo = int(splitTag[1])
else:
print(
"Malformed descriptor %s: only 'stereo' tag may contain underscore'"
% value
)
isValid = False
break
else:
subNo = 0
ii = validTags.get(tag)
if ii is None:
# tag validity
print("Malformed descriptor %s: %s is not a valid tag" % (value, `tag`))
isValid = False
break
elif ii < tagIndex:
# tag order
print("Malformed descriptor %s: tag %s appears out of sequence" % (value, `tag`))
isValid = False
break
elif ii == tagIndex:
if subNo <= lastSubNo:
# tags with subNo not presented in order
print("Malformed descriptor %s: %s is out of order" % (value, `desc`))
isValid = False
break
else:
# moved on to next type of tag
tagIndex = ii
lastSubNo = subNo
if tag == 'link':
if not self.chemComp.isLinearPolymer:
print("Malformed descriptor %s: only linear polymer ChemComps may have 'link' descriptors" % (value,))
isValid = False
break
linkCodes = text.split(',')
for ss in ('next', 'prev'):
if ss in linkCodes:
print ("Malformed descriptor %s: 'link:' section contains %s" % (value, ss))
isValid = False
break
atNames = []
chemComp = self.chemComp
for lc in linkCodes:
if chemComp.findFirstLinkEnd(linkCode=lc) is None:
print ("Malformed descriptor %s: 'link:' %s is not a known linkCode" % (value, lc))
isValid = False
break
atNames = []
for ss in linkCodes :
tt = ss.split('-', 1)
atNames.append(tt[0])
if len(tt) > 1 and tt[1] not in linkAtomNames:
print("Malformed descriptor %s: atom name %s is not a LinkAtom name for ChemCompVar" % (value, tt[1]))
isValid = False
break
else:
atNames = text.split(',')
if tag == 'stereo':
for aa in atNames:
if self.findFirstChemAtom(name=aa, subType=subNo) is None:
print("Malformed descriptor %s: atom name %s subtype %s is not in ChemCompVar" % (value, aa, subNo))
isValid = False
break
else:
for aa in atNames:
if aa not in chemAtomNames:
print("Malformed descriptor %s: atom name %s is not in ChemCompVar" % (value, aa))
isValid = False
break
if not isValid:
break
|