Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
libcifpp
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
open
libcifpp
Commits
915d6504
Unverified
Commit
915d6504
authored
Apr 13, 2021
by
Maarten L. Hekkelman
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
First steps, remove CCP4 info from compound
parent
5e63ca7a
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
399 additions
and
1393 deletions
+399
-1393
include/cif++/Cif++.hpp
+29
-7
include/cif++/Compound.hpp
+99
-250
include/cif++/Structure.hpp
+0
-7
src/Compound.cpp
+244
-1075
src/PDB2Cif.cpp
+25
-28
src/Structure.cpp
+2
-26
No files found.
include/cif++/Cif++.hpp
View file @
915d6504
...
...
@@ -289,9 +289,10 @@ namespace detail
void
swap
(
ItemReference
&
b
);
template
<
typename
T
=
std
::
string
>
T
as
()
const
auto
as
()
const
{
return
item_value_as
<
T
>::
convert
(
*
this
);
using
value_type
=
std
::
remove_cv_t
<
std
::
remove_reference_t
<
T
>>
;
return
item_value_as
<
value_type
>::
convert
(
*
this
);
}
template
<
typename
T
>
...
...
@@ -337,11 +338,13 @@ namespace detail
template
<
typename
T
>
struct
ItemReference
::
item_value_as
<
T
,
std
::
enable_if_t
<
std
::
is_floating_point_v
<
T
>>>
{
static
T
convert
(
const
ItemReference
&
ref
)
using
value_type
=
std
::
remove_reference_t
<
std
::
remove_cv_t
<
T
>>
;
static
value_type
convert
(
const
ItemReference
&
ref
)
{
T
result
=
{};
value_type
result
=
{};
if
(
not
ref
.
empty
())
result
=
static_cast
<
T
>
(
std
::
stod
(
ref
.
c_str
()));
result
=
static_cast
<
value_type
>
(
std
::
stod
(
ref
.
c_str
()));
return
result
;
}
...
...
@@ -376,7 +379,7 @@ namespace detail
};
template
<
typename
T
>
struct
ItemReference
::
item_value_as
<
T
,
std
::
enable_if_t
<
std
::
is_integral_v
<
T
>
and
std
::
is_unsigned_v
<
T
>>>
struct
ItemReference
::
item_value_as
<
T
,
std
::
enable_if_t
<
std
::
is_integral_v
<
T
>
and
std
::
is_unsigned_v
<
T
>
and
not
std
::
is_same_v
<
T
,
bool
>
>>
{
static
T
convert
(
const
ItemReference
&
ref
)
{
...
...
@@ -417,7 +420,7 @@ namespace detail
};
template
<
typename
T
>
struct
ItemReference
::
item_value_as
<
T
,
std
::
enable_if_t
<
std
::
is_integral_v
<
T
>
and
std
::
is_signed_v
<
T
>>>
struct
ItemReference
::
item_value_as
<
T
,
std
::
enable_if_t
<
std
::
is_integral_v
<
T
>
and
std
::
is_signed_v
<
T
>
and
not
std
::
is_same_v
<
T
,
bool
>
>>
{
static
T
convert
(
const
ItemReference
&
ref
)
{
...
...
@@ -482,6 +485,25 @@ namespace detail
}
};
template
<
typename
T
>
struct
ItemReference
::
item_value_as
<
T
,
std
::
enable_if_t
<
std
::
is_same_v
<
T
,
bool
>>>
{
static
bool
convert
(
const
ItemReference
&
ref
)
{
bool
result
=
false
;
if
(
not
ref
.
empty
())
result
=
iequals
(
ref
.
c_str
(),
"y"
);
return
result
;
}
static
int
compare
(
const
ItemReference
&
ref
,
bool
value
,
bool
icase
)
{
bool
rv
=
convert
(
ref
);
return
value
&&
rv
?
0
:
(
rv
<
value
?
-
1
:
1
);
}
};
template
<
size_t
N
>
struct
ItemReference
::
item_value_as
<
char
[
N
]
>
{
...
...
include/cif++/Compound.hpp
View file @
915d6504
...
...
@@ -26,10 +26,13 @@
#pragma once
/// \file This file contains the definition for the class Compound, encapsulating
/// the information found for compounds in the CCD.
#include <map>
#include <set>
#include <tuple>
#include <vector>
#include <map>
#include "cif++/AtomType.hpp"
#include "cif++/Cif++.hpp"
...
...
@@ -38,310 +41,156 @@ namespace mmcif
{
// --------------------------------------------------------------------
// The chemical composition of the structure in an mmCIF file is
// defined in the class composition. A compositon consists of
// entities. Each Entity can be either a polymer, a non-polymer
// a macrolide or a water molecule.
// Entities themselves are made up of compounds. And compounds
// contain CompoundAtom records for each atom.
class
Compound
;
class
Link
;
struct
CompoundAtom
;
enum
BondType
{
singleBond
,
doubleBond
,
tripleBond
,
delocalizedBond
};
// --------------------------------------------------------------------
// struct containing information about an atom in a chemical compound
// This information comes from the CCP4 monomer library.
struct
CompoundAtom
{
std
::
string
id
;
AtomType
typeSymbol
;
std
::
string
typeEnergy
;
float
partialCharge
;
};
// --------------------------------------------------------------------
// struct containing information about the bonds
// This information comes from the CCP4 monomer library.
struct
CompoundBond
{
std
::
string
atomID
[
2
];
BondType
type
;
float
distance
;
float
esd
;
};
// --------------------------------------------------------------------
// struct containing information about the bond-angles
// This information comes from the CCP4 monomer library.
struct
CompoundAngle
{
std
::
string
atomID
[
3
];
float
angle
;
float
esd
;
/// \brief The bond type as defined in the CCD, possible values taken from the mmcif_pdbx_v50 file
enum
class
BondType
{
sing
,
// 'single bond'
doub
,
// 'double bond'
trip
,
// 'triple bond'
quad
,
// 'quadruple bond'
arom
,
// 'aromatic bond'
poly
,
// 'polymeric bond'
delo
,
// 'delocalized double bond'
pi
,
// 'pi bond'
};
// --------------------------------------------------------------------
// struct containing information about the bond-angles
// This information comes from the CCP4 monomer library.
std
::
string
to_string
(
BondType
bondType
);
BondType
from_string
(
const
std
::
string
&
bondType
);
struct
CompoundTorsion
{
std
::
string
atomID
[
4
];
float
angle
;
float
esd
;
int
period
;
};
/// --------------------------------------------------------------------
/// \brief struct containing information about an atom in a chemical compound.
/// This is a subset of the available information. Contact the author if you need more fields.
// --------------------------------------------------------------------
// struct containing information about the bond-angles
// This information comes from the CCP4 monomer library.
struct
CompoundPlane
struct
CompoundAtom
{
std
::
string
id
;
std
::
vector
<
std
::
string
>
atomID
;
float
esd
;
std
::
string
id
;
AtomType
typeSymbol
;
int
charge
;
bool
aromatic
;
bool
leavingAtom
;
bool
stereoConfig
;
float
x
,
y
,
z
;
};
// --------------------------------------------------------------------
// struct containing information about a chiral centre
// This information comes from the CCP4 monomer library.
enum
ChiralVolumeSign
{
negativ
,
positiv
,
both
};
/// --------------------------------------------------------------------
/// \brief struct containing information about the bonds
struct
Compound
ChiralCentre
struct
Compound
Bond
{
std
::
string
id
;
std
::
string
atomIDCentre
;
std
::
string
atomID
[
3
];
ChiralVolumeSign
volumeSign
;
std
::
string
atomID
[
2
];
BondType
type
;
bool
aromatic
,
stereoConfig
;
};
// --------------------------------------------------------------------
// a class that contains information about a chemical compound.
// This information is derived from the ccp4 monomer library by default.
// To create compounds, you'd best use the factory method.
/// --------------------------------------------------------------------
/// \brief a class that contains information about a chemical compound.
/// This information is derived from the CDD by default.
///
/// To create compounds, you use the factory method. You can add your own
/// compound definitions by calling the addExtraComponents function and
/// pass it a valid CCD formatted file.
class
Compound
{
public
:
Compound
(
const
std
::
string
&
file
,
const
std
::
string
&
id
,
const
std
::
string
&
name
,
const
std
::
string
&
group
);
// factory method, create a Compound based on the three letter code
// (for amino acids) or the one-letter code (for bases) or the
// code as it is known in the CCP4 monomer library.
static
const
Compound
*
create
(
const
std
::
string
&
id
);
// this second factory method can create a Compound even if it is not
// recorded in the library. It will take the values from the CCP4 lib
// unless the value passed to this function is not empty.
static
const
Compound
*
create
(
const
std
::
string
&
id
,
const
std
::
string
&
name
,
const
std
::
string
&
type
,
const
std
::
string
&
formula
);
// add an additional path to the monomer library.
static
void
addMonomerLibraryPath
(
const
std
::
string
&
dir
);
// accessors
std
::
string
id
()
const
{
return
mID
;
}
std
::
string
name
()
const
{
return
mName
;
}
std
::
string
type
()
const
;
std
::
string
group
()
const
{
return
mGroup
;
}
std
::
vector
<
CompoundAtom
>
atoms
()
const
{
return
mAtoms
;
}
std
::
vector
<
CompoundBond
>
bonds
()
const
{
return
mBonds
;
}
std
::
vector
<
CompoundAngle
>
angles
()
const
{
return
mAngles
;
}
std
::
vector
<
CompoundChiralCentre
>
chiralCentres
()
const
{
return
mChiralCentres
;
}
std
::
vector
<
CompoundPlane
>
planes
()
const
{
return
mPlanes
;
}
std
::
vector
<
CompoundTorsion
>
torsions
()
const
{
return
mTorsions
;
}
CompoundAtom
getAtomByID
(
const
std
::
string
&
atomID
)
const
;
bool
atomsBonded
(
const
std
::
string
&
atomId_1
,
const
std
::
string
&
atomId_2
)
const
;
float
atomBondValue
(
const
std
::
string
&
atomId_1
,
const
std
::
string
&
atomId_2
)
const
;
float
bondAngle
(
const
std
::
string
&
atomId_1
,
const
std
::
string
&
atomId_2
,
const
std
::
string
&
atomId_3
)
const
;
float
chiralVolume
(
const
std
::
string
&
centreID
)
const
;
std
::
string
formula
()
const
;
float
formulaWeight
()
const
;
int
charge
()
const
;
bool
isWater
()
const
;
bool
isSugar
()
const
;
std
::
vector
<
std
::
string
>
isomers
()
const
;
bool
isIsomerOf
(
const
Compound
&
c
)
const
;
std
::
vector
<
std
::
tuple
<
std
::
string
,
std
::
string
>>
mapToIsomer
(
const
Compound
&
c
)
const
;
private
:
Compound
(
cif
::
Datablock
&
db
);
~
Compound
();
cif
::
File
mCF
;
std
::
string
mID
;
std
::
string
mName
;
std
::
string
mGroup
;
std
::
vector
<
CompoundAtom
>
mAtoms
;
std
::
vector
<
CompoundBond
>
mBonds
;
std
::
vector
<
CompoundAngle
>
mAngles
;
std
::
vector
<
CompoundTorsion
>
mTorsions
;
std
::
vector
<
CompoundChiralCentre
>
mChiralCentres
;
std
::
vector
<
CompoundPlane
>
mPlanes
;
};
// --------------------------------------------------------------------
// struct containing information about the bonds
// This information comes from the CCP4 monomer library.
struct
LinkAtom
{
int
compID
;
std
::
string
atomID
;
bool
operator
==
(
const
LinkAtom
&
rhs
)
const
{
return
compID
==
rhs
.
compID
and
atomID
==
rhs
.
atomID
;
}
};
struct
LinkBond
{
LinkAtom
atom
[
2
];
BondType
type
;
float
distance
;
float
esd
;
};
// --------------------------------------------------------------------
// struct containing information about the bond-angles
// This information comes from the CCP4 monomer library.
struct
LinkAngle
{
LinkAtom
atom
[
3
];
float
angle
;
float
esd
;
};
// --------------------------------------------------------------------
// struct containing information about the bond-torsions
// This information comes from the CCP4 monomer library.
/// \brief factory method, create a Compound based on the three letter code
/// (for amino acids) or the one-letter code (for bases) or the
/// code as it is known in the CCD.
struct
LinkTorsion
{
LinkAtom
atom
[
4
];
float
angle
;
float
esd
;
int
period
;
};
static
const
Compound
*
create
(
const
std
::
string
&
id
);
// --------------------------------------------------------------------
// struct containing information about the bond-angles
// This information comes from the CCP4 monomer library.
// /// this second factory method can create a Compound even if it is not
// /// recorded in the library. It will take the values from the CCP4 lib
// /// unless the value passed to this function is not empty.
// static const Compound* create(const std::string& id, const std::string& name,
// const std::string& type, const std::string& formula);
struct
LinkPlane
{
std
::
string
id
;
std
::
vector
<
LinkAtom
>
atoms
;
float
esd
;
};
/// \brief Create compounds based on the data in the file \a components
///
/// It is often required to add information about unknown components.
/// This file parses either a CCP4 or a CCD formatted components file
///
/// \param components The mmCIF file containing the components
/// \result An array containing the ID's of the added components
static
std
::
vector
<
std
::
string
>
addExtraComponents
(
const
std
::
filesystem
::
path
&
components
);
// --------------------------------------------------------------------
// struct containing information about a chiral centre
// This information comes from the CCP4 monomer library.
// accessors
struct
LinkChiralCentre
{
std
::
string
id
;
LinkAtom
atomCentre
;
LinkAtom
atom
[
3
];
ChiralVolumeSign
volumeSign
;
};
std
::
string
id
()
const
{
return
mID
;
}
std
::
string
name
()
const
{
return
mName
;
}
std
::
string
type
()
const
{
return
mType
;
}
std
::
string
formula
()
const
{
return
mFormula
;
}
float
formulaWeight
()
const
{
return
mFormulaWeight
;
}
int
formalCharge
()
const
{
return
mFormalCharge
;
}
// --------------------------------------------------------------------
// a class that contains information about a chemical link between compounds.
// This information is derived from the ccp4 monomer library by default.
const
std
::
vector
<
CompoundAtom
>
&
atoms
()
const
{
return
mAtoms
;
}
const
std
::
vector
<
CompoundBond
>
&
bonds
()
const
{
return
mBonds
;
}
class
Link
{
public
:
CompoundAtom
getAtomByID
(
const
std
::
string
&
atomID
)
const
;
Link
(
cif
::
Datablock
&
db
);
bool
atomsBonded
(
const
std
::
string
&
atomId_1
,
const
std
::
string
&
atomId_2
)
const
;
// float atomBondValue(const std::string &atomId_1, const std::string &atomId_2) const;
// float bondAngle(const std::string &atomId_1, const std::string &atomId_2, const std::string &atomId_3) const;
// float chiralVolume(const std::string ¢reID) const;
// Factory method.
static
const
Link
&
create
(
const
std
::
string
&
id
);
bool
isWater
()
const
{
return
mID
==
"HOH"
or
mID
==
"H2O"
or
mID
==
"WAT"
;
}
// accessors
std
::
string
id
()
const
{
return
mID
;
}
std
::
vector
<
LinkBond
>
bonds
()
const
{
return
mBonds
;
}
std
::
vector
<
LinkAngle
>
angles
()
const
{
return
mAngles
;
}
std
::
vector
<
LinkChiralCentre
>
chiralCentres
()
const
{
return
mChiralCentres
;
}
std
::
vector
<
LinkPlane
>
planes
()
const
{
return
mPlanes
;
}
std
::
vector
<
LinkTorsion
>
torsions
()
const
{
return
mTorsions
;
}
float
atomBondValue
(
const
LinkAtom
&
atomId_1
,
const
LinkAtom
&
atomId_2
)
const
;
float
bondAngle
(
const
LinkAtom
&
atomId_1
,
const
LinkAtom
&
atomId_2
,
const
LinkAtom
&
atomId_3
)
const
;
float
chiralVolume
(
const
std
::
string
&
id
)
const
;
private
:
~
Link
()
;
std
::
string
mID
;
std
::
vector
<
LinkBond
>
mBonds
;
std
::
vector
<
LinkAngle
>
mAngles
;
std
::
vector
<
LinkTorsion
>
mTorsions
;
std
::
vector
<
LinkChiralCentre
>
mChiralCentre
s
;
std
::
vector
<
LinkPlane
>
mPlane
s
;
std
::
string
mID
;
std
::
string
mName
;
std
::
string
mType
;
std
::
string
mFormula
;
float
mFormulaWeight
;
int
mFormalCharge
;
std
::
vector
<
CompoundAtom
>
mAtom
s
;
std
::
vector
<
CompoundBond
>
mBond
s
;
};
// --------------------------------------------------------------------
// Factory class for Compound and Link objects
extern
const
std
::
map
<
std
::
string
,
char
>
kAAMap
,
kBaseMap
;
extern
const
std
::
map
<
std
::
string
,
char
>
kAAMap
,
kBaseMap
;
class
CompoundFactory
{
public
:
static
void
init
(
bool
useThreadLocalInstanceOnly
);
static
CompoundFactory
&
instance
();
static
CompoundFactory
&
instance
();
static
void
clear
();
void
pushDictionary
(
const
std
::
string
&
inDictFile
);
void
pushDictionary
(
const
std
::
string
&
inDictFile
);
void
popDictionary
();
bool
isKnownPeptide
(
const
std
::
string
&
res_name
)
const
;
bool
isKnownBase
(
const
std
::
string
&
res_name
)
const
;
bool
isKnownPeptide
(
const
std
::
string
&
res_name
)
const
;
bool
isKnownBase
(
const
std
::
string
&
res_name
)
const
;
std
::
string
unalias
(
const
std
::
string
&
res_name
)
const
;
const
Compound
*
get
(
std
::
string
id
);
const
Compound
*
create
(
std
::
string
id
);
const
Link
*
getLink
(
std
::
string
id
);
const
Link
*
createLink
(
std
::
string
id
);
const
Compound
*
get
(
std
::
string
id
);
const
Compound
*
create
(
std
::
string
id
);
~
CompoundFactory
();
private
:
CompoundFactory
();
CompoundFactory
(
const
CompoundFactory
&
)
=
delete
;
CompoundFactory
&
operator
=
(
const
CompoundFactory
&
)
=
delete
;
static
CompoundFactory
*
sInstance
;
CompoundFactory
(
const
CompoundFactory
&
)
=
delete
;
CompoundFactory
&
operator
=
(
const
CompoundFactory
&
)
=
delete
;
static
CompoundFactory
*
sInstance
;
static
thread_local
std
::
unique_ptr
<
CompoundFactory
>
tlInstance
;
static
bool
sUseThreadLocalInstance
;
class
CompoundFactoryImpl
*
mImpl
;
class
CompoundFactoryImpl
*
mImpl
;
};
}
}
// namespace mmcif
include/cif++/Structure.hpp
View file @
915d6504
...
...
@@ -141,9 +141,6 @@ class Atom
// access data in compound for this atom
// the energy-type field
std
::
string
energyType
()
const
;
// convenience routine
bool
isBackBone
()
const
{
...
...
@@ -243,10 +240,6 @@ class Residue
bool
isWater
()
const
{
return
mCompoundID
==
"HOH"
;
}
bool
isSugar
()
const
;
bool
isPyranose
()
const
;
bool
isFuranose
()
const
;
const
Structure
&
structure
()
const
{
return
*
mStructure
;
}
bool
empty
()
const
{
return
mStructure
==
nullptr
;
}
...
...
src/Compound.cpp
View file @
915d6504
...
...
@@ -29,8 +29,8 @@
#endif
#include <map>
#include <numeric>
#include <mutex>
#include <numeric>
#include <shared_mutex>
#include <boost/algorithm/string.hpp>
...
...
@@ -39,9 +39,9 @@
#include <fstream>
#include "cif++/Cif++.hpp"
#include "cif++/Point.hpp"
#include "cif++/Compound.hpp"
#include "cif++/CifUtils.hpp"
#include "cif++/Compound.hpp"
#include "cif++/Point.hpp"
namespace
ba
=
boost
::
algorithm
;
namespace
fs
=
std
::
filesystem
;
...
...
@@ -50,11 +50,41 @@ namespace mmcif
{
// --------------------------------------------------------------------
std
::
string
to_string
(
BondType
bondType
)
{
switch
(
bondType
)
{
case
BondType
:
:
sing
:
return
"sing"
;
case
BondType
:
:
doub
:
return
"doub"
;
case
BondType
:
:
trip
:
return
"trip"
;
case
BondType
:
:
quad
:
return
"quad"
;
case
BondType
:
:
arom
:
return
"arom"
;
case
BondType
:
:
poly
:
return
"poly"
;
case
BondType
:
:
delo
:
return
"delo"
;
case
BondType
:
:
pi
:
return
"pi"
;
}
}
BondType
from_string
(
const
std
::
string
&
bondType
)
{
if
(
cif
::
iequals
(
bondType
,
"sing"
))
return
BondType
::
sing
;
if
(
cif
::
iequals
(
bondType
,
"doub"
))
return
BondType
::
doub
;
if
(
cif
::
iequals
(
bondType
,
"trip"
))
return
BondType
::
trip
;
if
(
cif
::
iequals
(
bondType
,
"quad"
))
return
BondType
::
quad
;
if
(
cif
::
iequals
(
bondType
,
"arom"
))
return
BondType
::
arom
;
if
(
cif
::
iequals
(
bondType
,
"poly"
))
return
BondType
::
poly
;
if
(
cif
::
iequals
(
bondType
,
"delo"
))
return
BondType
::
delo
;
if
(
cif
::
iequals
(
bondType
,
"pi"
))
return
BondType
::
pi
;
throw
std
::
invalid_argument
(
"Invalid bondType: "
+
bondType
);
}
// --------------------------------------------------------------------
// Compound helper classes
struct
CompoundAtomLess
{
bool
operator
()(
const
CompoundAtom
&
a
,
const
CompoundAtom
&
b
)
const
bool
operator
()(
const
CompoundAtom
&
a
,
const
CompoundAtom
&
b
)
const
{
int
d
=
a
.
id
.
compare
(
b
.
id
);
if
(
d
==
0
)
...
...
@@ -65,512 +95,59 @@ struct CompoundAtomLess
struct
CompoundBondLess
{
bool
operator
()(
const
CompoundBond
&
a
,
const
CompoundBond
&
b
)
const
bool
operator
()(
const
CompoundBond
&
a
,
const
CompoundBond
&
b
)
const
{
int
d
=
a
.
atomID
[
0
].
compare
(
b
.
atomID
[
0
]);
if
(
d
==
0
)
d
=
a
.
atomID
[
1
].
compare
(
b
.
atomID
[
1
]);
if
(
d
==
0
)
d
=
a
.
type
-
b
.
type
;
d
=
static_cast
<
int
>
(
a
.
type
)
-
static_cast
<
int
>
(
b
.
type
)
;
return
d
<
0
;
}
};
// --------------------------------------------------------------------
// Isomers in the ccp4 dictionary
struct
IsomerSet
{
std
::
vector
<
std
::
string
>
compounds
;
};
struct
IsomerSets
{
std
::
vector
<
IsomerSet
>
isomers
;
};
class
IsomerDB
{
public
:
static
IsomerDB
&
instance
();
size_t
count
(
const
std
::
string
&
compound
)
const
;
const
std
::
vector
<
std
::
string
>&
operator
[](
const
std
::
string
&
compound
)
const
;
private
:
IsomerDB
();
IsomerSets
mData
;
};
IsomerDB
::
IsomerDB
()
{
#if defined(CACHE_DIR)
fs
::
path
isomersFile
=
fs
::
path
(
CACHE_DIR
)
/
"isomers.txt"
;
if
(
not
fs
::
exists
(
isomersFile
))
std
::
cerr
<<
"Could not locate isomers.txt in "
CACHE_DIR
<<
std
::
endl
;
else
{
std
::
ifstream
is
(
isomersFile
);
std
::
string
line
;
while
(
std
::
getline
(
is
,
line
))
{
IsomerSet
compounds
;
ba
::
split
(
compounds
.
compounds
,
line
,
ba
::
is_any_of
(
":"
));
if
(
not
compounds
.
compounds
.
empty
())
mData
.
isomers
.
emplace_back
(
std
::
move
(
compounds
));
}
}
#endif
}
IsomerDB
&
IsomerDB
::
instance
()
{
static
IsomerDB
sInstance
;
return
sInstance
;
}
size_t
IsomerDB
::
count
(
const
std
::
string
&
compound
)
const
{
size_t
n
=
0
;
for
(
auto
&
d
:
mData
.
isomers
)
{
if
(
find
(
d
.
compounds
.
begin
(),
d
.
compounds
.
end
(),
compound
)
!=
d
.
compounds
.
end
())
++
n
;
}
return
n
;
}
const
std
::
vector
<
std
::
string
>&
IsomerDB
::
operator
[](
const
std
::
string
&
compound
)
const
{
for
(
auto
&
d
:
mData
.
isomers
)
{
if
(
find
(
d
.
compounds
.
begin
(),
d
.
compounds
.
end
(),
compound
)
!=
d
.
compounds
.
end
())
return
d
.
compounds
;
}
throw
std
::
runtime_error
(
"No isomer set found containing "
+
compound
);
}
// --------------------------------------------------------------------
// Brute force comparison of two structures, when they are isomers the
// mapping between the atoms of both is returned
// This is not an optimal solution, but it works good enough for now
struct
Node
{
std
::
string
id
;
AtomType
symbol
;
std
::
vector
<
std
::
tuple
<
size_t
,
BondType
>>
links
;
size_t
hydrogens
=
0
;
};
// Check to see if the nodes a[iA] and b[iB] are the start of a similar sub structure
bool
SubStructuresAreIsomeric
(
const
std
::
vector
<
Node
>&
a
,
const
std
::
vector
<
Node
>&
b
,
size_t
iA
,
size_t
iB
,
std
::
vector
<
bool
>
visitedA
,
std
::
vector
<
bool
>
visitedB
,
std
::
vector
<
std
::
tuple
<
std
::
string
,
std
::
string
>>&
outMapping
)
{
auto
&
na
=
a
[
iA
];
auto
&
nb
=
b
[
iB
];
size_t
N
=
na
.
links
.
size
();
assert
(
na
.
symbol
==
nb
.
symbol
);
assert
(
nb
.
links
.
size
()
==
N
);
// we're optimistic today
bool
result
=
true
;
visitedA
[
iA
]
=
true
;
visitedB
[
iB
]
=
true
;
// we now have two sets of links to compare.
// Compare each permutation of the second set of indices with the first
std
::
vector
<
size_t
>
ilb
(
N
);
iota
(
ilb
.
begin
(),
ilb
.
end
(),
0
);
for
(;;)
{
result
=
true
;
std
::
vector
<
std
::
tuple
<
std
::
string
,
std
::
string
>>
m
;
for
(
size_t
i
=
0
;
result
and
i
<
N
;
++
i
)
{
size_t
lA
,
lB
;
BondType
typeA
,
typeB
;
std
::
tie
(
lA
,
typeA
)
=
na
.
links
[
i
];
assert
(
lA
<
a
.
size
());
std
::
tie
(
lB
,
typeB
)
=
nb
.
links
[
ilb
[
i
]];
assert
(
lB
<
b
.
size
());
if
(
typeA
!=
typeB
or
visitedA
[
lA
]
!=
visitedB
[
lB
])
{
result
=
false
;
break
;
}
auto
&
la
=
a
[
lA
];
auto
&
lb
=
b
[
lB
];
if
(
la
.
symbol
!=
lb
.
symbol
or
la
.
hydrogens
!=
lb
.
hydrogens
or
la
.
links
.
size
()
!=
lb
.
links
.
size
())
{
result
=
false
;
break
;
}
if
(
not
visitedA
[
lA
])
result
=
SubStructuresAreIsomeric
(
a
,
b
,
lA
,
lB
,
visitedA
,
visitedB
,
m
);
}
if
(
result
)
{
outMapping
.
insert
(
outMapping
.
end
(),
m
.
begin
(),
m
.
end
());
break
;
}
if
(
not
next_permutation
(
ilb
.
begin
(),
ilb
.
end
()))
break
;
}
if
(
result
and
na
.
id
!=
nb
.
id
)
outMapping
.
emplace_back
(
na
.
id
,
nb
.
id
);
return
result
;
}
bool
StructuresAreIsomeric
(
std
::
vector
<
CompoundAtom
>
atomsA
,
const
std
::
vector
<
CompoundBond
>&
bondsA
,
std
::
vector
<
CompoundAtom
>
atomsB
,
const
std
::
vector
<
CompoundBond
>&
bondsB
,
std
::
vector
<
std
::
tuple
<
std
::
string
,
std
::
string
>>&
outMapping
)
{
assert
(
atomsA
.
size
()
==
atomsB
.
size
());
assert
(
bondsA
.
size
()
==
bondsB
.
size
());
std
::
vector
<
Node
>
a
,
b
;
std
::
map
<
std
::
string
,
size_t
>
ma
,
mb
;
for
(
auto
&
atomA
:
atomsA
)
{
ma
[
atomA
.
id
]
=
a
.
size
();
a
.
push_back
({
atomA
.
id
,
atomA
.
typeSymbol
});
}
for
(
auto
&
bondA
:
bondsA
)
{
size_t
atom1
=
ma
.
at
(
bondA
.
atomID
[
0
]);
size_t
atom2
=
ma
.
at
(
bondA
.
atomID
[
1
]);
if
(
a
[
atom2
].
symbol
==
H
)
a
[
atom1
].
hydrogens
+=
1
;
else
a
[
atom1
].
links
.
emplace_back
(
atom2
,
bondA
.
type
);
if
(
a
[
atom1
].
symbol
==
H
)
a
[
atom2
].
hydrogens
+=
1
;
else
a
[
atom2
].
links
.
emplace_back
(
atom1
,
bondA
.
type
);
}
for
(
auto
&
atomB
:
atomsB
)
{
mb
[
atomB
.
id
]
=
b
.
size
();
b
.
push_back
({
atomB
.
id
,
atomB
.
typeSymbol
});
}
for
(
auto
&
bondB
:
bondsB
)
{
size_t
atom1
=
mb
.
at
(
bondB
.
atomID
[
0
]);
size_t
atom2
=
mb
.
at
(
bondB
.
atomID
[
1
]);
if
(
b
[
atom2
].
symbol
==
H
)
b
[
atom1
].
hydrogens
+=
1
;
else
b
[
atom1
].
links
.
emplace_back
(
atom2
,
bondB
.
type
);
if
(
b
[
atom1
].
symbol
==
H
)
b
[
atom2
].
hydrogens
+=
1
;
else
b
[
atom2
].
links
.
emplace_back
(
atom1
,
bondB
.
type
);
}
size_t
N
=
atomsA
.
size
();
size_t
ia
=
0
;
bool
result
=
false
;
// try each atom in B to see if it can be traced to be similar to A starting at zero
for
(
size_t
ib
=
0
;
ib
<
N
;
++
ib
)
{
if
(
b
[
ib
].
symbol
!=
a
[
ia
].
symbol
or
a
[
ia
].
hydrogens
!=
b
[
ib
].
hydrogens
or
a
[
ia
].
links
.
size
()
!=
b
[
ib
].
links
.
size
())
continue
;
std
::
vector
<
bool
>
va
(
N
,
false
),
vb
(
N
,
false
);
if
(
SubStructuresAreIsomeric
(
a
,
b
,
ia
,
ib
,
va
,
vb
,
outMapping
))
{
result
=
true
;
break
;
}
}
return
result
;
}
// --------------------------------------------------------------------
// Compound
Compound
::
Compound
(
const
std
::
string
&
file
,
const
std
::
string
&
id
,
const
std
::
string
&
name
,
const
std
::
string
&
group
)
:
mID
(
id
),
mName
(
name
),
mGroup
(
group
)
Compound
::
Compound
(
cif
::
Datablock
&
db
)
{
try
{
mCF
.
load
(
file
);
// locate the datablock
auto
&
db
=
mCF
[
"comp_"
+
id
];
auto
&
compoundAtoms
=
db
[
"chem_comp_atom"
];
for
(
auto
row
:
compoundAtoms
)
{
std
::
string
id
,
symbol
,
energy
;
float
charge
;
cif
::
tie
(
id
,
symbol
,
energy
,
charge
)
=
row
.
get
(
"atom_id"
,
"type_symbol"
,
"type_energy"
,
"partial_charge"
);
mAtoms
.
push_back
({
id
,
AtomTypeTraits
(
symbol
).
type
(),
energy
,
charge
});
}
sort
(
mAtoms
.
begin
(),
mAtoms
.
end
(),
CompoundAtomLess
());
auto
&
compBonds
=
db
[
"chem_comp_bond"
];
for
(
auto
row
:
compBonds
)
{
CompoundBond
b
;
std
::
string
type
,
aromatic
;
cif
::
tie
(
b
.
atomID
[
0
],
b
.
atomID
[
1
],
type
,
b
.
distance
,
b
.
esd
)
=
row
.
get
(
"atom_id_1"
,
"atom_id_2"
,
"type"
,
"value_dist"
,
"value_dist_esd"
);
using
cif
::
iequals
;
if
(
iequals
(
type
,
"single"
)
or
iequals
(
type
,
"sing"
))
b
.
type
=
singleBond
;
else
if
(
iequals
(
type
,
"double"
)
or
iequals
(
type
,
"doub"
))
b
.
type
=
doubleBond
;
else
if
(
iequals
(
type
,
"triple"
)
or
iequals
(
type
,
"trip"
))
b
.
type
=
tripleBond
;
else
if
(
iequals
(
type
,
"deloc"
)
or
iequals
(
type
,
"aromat"
)
or
iequals
(
type
,
"aromatic"
))
b
.
type
=
delocalizedBond
;
else
{
if
(
cif
::
VERBOSE
)
std
::
cerr
<<
"Unimplemented chem_comp_bond.type "
<<
type
<<
" in "
<<
id
<<
std
::
endl
;
b
.
type
=
singleBond
;
}
if
(
b
.
atomID
[
0
]
>
b
.
atomID
[
1
])
swap
(
b
.
atomID
[
0
],
b
.
atomID
[
1
]);
mBonds
.
push_back
(
b
);
}
sort
(
mBonds
.
begin
(),
mBonds
.
end
(),
CompoundBondLess
());
auto
&
chemComp
=
db
[
"chem_comp"
];
for
(
auto
row
:
db
[
"chem_comp_angle"
])
{
CompoundAngle
a
;
cif
::
tie
(
a
.
atomID
[
0
],
a
.
atomID
[
1
],
a
.
atomID
[
2
],
a
.
angle
,
a
.
esd
)
=
row
.
get
(
"atom_id_1"
,
"atom_id_2"
,
"atom_id_3"
,
"value_angle"
,
"value_angle_esd"
);
mAngles
.
push_back
(
a
);
}
if
(
chemComp
.
size
()
!=
1
)
throw
std
::
runtime_error
(
"Invalid compound file, chem_comp should contain a single row"
);
for
(
auto
row
:
db
[
"chem_comp_tor"
])
{
CompoundTorsion
a
;
cif
::
tie
(
a
.
atomID
[
0
],
a
.
atomID
[
1
],
a
.
atomID
[
2
],
a
.
atomID
[
3
],
a
.
angle
,
a
.
esd
,
a
.
period
)
=
row
.
get
(
"atom_id_1"
,
"atom_id_2"
,
"atom_id_3"
,
"atom_id_4"
,
"value_angle"
,
"value_angle_esd"
,
"period"
);
mTorsions
.
push_back
(
a
);
}
cif
::
tie
(
mID
,
mName
,
mType
,
mFormula
,
mFormulaWeight
,
mFormalCharge
)
=
chemComp
.
front
().
get
(
"id"
,
"name"
,
"type"
,
"formula"
,
"formula_weight"
,
"pdbx_formal_charge"
);
for
(
auto
row
:
db
[
"chem_comp_chir"
])
{
CompoundChiralCentre
cc
;
std
::
string
volumeSign
;
cif
::
tie
(
cc
.
id
,
cc
.
atomIDCentre
,
cc
.
atomID
[
0
],
cc
.
atomID
[
1
],
cc
.
atomID
[
2
],
volumeSign
)
=
row
.
get
(
"id"
,
"atom_id_centre"
,
"atom_id_1"
,
"atom_id_2"
,
"atom_id_3"
,
"volume_sign"
);
if
(
volumeSign
==
"negativ"
or
volumeSign
==
"negative"
)
cc
.
volumeSign
=
negativ
;
else
if
(
volumeSign
==
"positiv"
or
volumeSign
==
"positive"
)
cc
.
volumeSign
=
positiv
;
else
if
(
volumeSign
==
"both"
)
cc
.
volumeSign
=
both
;
else
{
if
(
cif
::
VERBOSE
)
std
::
cerr
<<
"Unimplemented chem_comp_chir.volume_sign "
<<
volumeSign
<<
" in "
<<
id
<<
std
::
endl
;
continue
;
}
mChiralCentres
.
push_back
(
cc
);
}
auto
&
compPlanes
=
db
[
"chem_comp_plane_atom"
];
for
(
auto
row
:
compPlanes
)
{
std
::
string
atom_id
,
plane_id
;
float
esd
;
cif
::
tie
(
atom_id
,
plane_id
,
esd
)
=
row
.
get
(
"atom_id"
,
"plane_id"
,
"dist_esd"
);
auto
i
=
find_if
(
mPlanes
.
begin
(),
mPlanes
.
end
(),
[
&
](
auto
&
p
)
{
return
p
.
id
==
plane_id
;});
if
(
i
==
mPlanes
.
end
())
mPlanes
.
emplace_back
(
CompoundPlane
{
plane_id
,
{
atom_id
},
esd
});
else
i
->
atomID
.
push_back
(
atom_id
);
}
}
catch
(
const
std
::
exception
&
ex
)
auto
&
chemCompAtom
=
db
[
"chem_comp_atom"
];
for
(
auto
row
:
chemCompAtom
)
{
std
::
cerr
<<
"Error loading ccp4 file for "
<<
id
<<
" from file "
<<
file
<<
std
::
endl
;
throw
;
CompoundAtom
atom
;
std
::
string
typeSymbol
;
cif
::
tie
(
atom
.
id
,
typeSymbol
,
atom
.
charge
,
atom
.
aromatic
,
atom
.
leavingAtom
,
atom
.
stereoConfig
,
atom
.
x
,
atom
.
y
,
atom
.
z
)
=
row
.
get
(
"id"
,
"type_symbol"
,
"charge"
,
"pdbx_aromatic_flag"
,
"pdbx_leaving_atom_flag"
,
"pdbx_stereo_config"
,
"model_Cartn_x"
,
"model_Cartn_y"
,
"model_Cartn_z"
);
atom
.
typeSymbol
=
AtomTypeTraits
(
typeSymbol
).
type
();
mAtoms
.
push_back
(
std
::
move
(
atom
));
}
}
std
::
string
Compound
::
formula
()
const
{
std
::
string
result
;
std
::
map
<
std
::
string
,
uint32_t
>
atoms
;
float
chargeSum
=
0
;
for
(
auto
r
:
mAtoms
)
{
atoms
[
AtomTypeTraits
(
r
.
typeSymbol
).
symbol
()]
+=
1
;
chargeSum
+=
r
.
partialCharge
;
}
auto
c
=
atoms
.
find
(
"C"
);
if
(
c
!=
atoms
.
end
())
{
result
=
"C"
;
if
(
c
->
second
>
1
)
result
+=
std
::
to_string
(
c
->
second
);
atoms
.
erase
(
c
);
auto
h
=
atoms
.
find
(
"H"
);
if
(
h
!=
atoms
.
end
())
{
result
+=
" H"
;
if
(
h
->
second
>
1
)
result
+=
std
::
to_string
(
h
->
second
);
atoms
.
erase
(
h
);
}
}
for
(
auto
a
:
atoms
)
auto
&
chemCompBond
=
db
[
"chem_comp_bond"
];
for
(
auto
row
:
chemCompBond
)
{
if
(
not
result
.
empty
())
result
+=
' '
;
result
+=
a
.
first
;
if
(
a
.
second
>
1
)
result
+=
std
::
to_string
(
a
.
second
);
CompoundBond
bond
;
std
::
string
valueOrder
;
cif
::
tie
(
bond
.
atomID
[
0
],
bond
.
atomID
[
1
],
valueOrder
,
bond
.
aromatic
,
bond
.
stereoConfig
)
=
row
.
get
(
"atom_id_1"
,
"atom_id_2"
,
"value_order"
,
"pdbx_aromatic_flag"
,
"pdbx_stereo_config"
)
;
bond
.
type
=
from_string
(
valueOrder
);
mBonds
.
push_back
(
std
::
move
(
bond
));
}
int
charge
=
lrint
(
chargeSum
);
if
(
charge
!=
0
)
result
+=
' '
+
std
::
to_string
(
charge
);
return
result
;
}
float
Compound
::
formulaWeight
()
const
{
float
result
=
0
;
for
(
auto
r
:
mAtoms
)
result
+=
AtomTypeTraits
(
r
.
typeSymbol
).
weight
();
return
result
;
}
int
Compound
::
charge
()
const
{
float
result
=
0
;
for
(
auto
r
:
mAtoms
)
result
+=
r
.
partialCharge
;
return
lrint
(
result
);
}
std
::
string
Compound
::
type
()
const
{
std
::
string
result
;
// known groups are (counted from ccp4 monomer dictionary)
// D-pyranose
// DNA
// L-PEPTIDE LINKING
// L-SACCHARIDE
// L-peptide
// L-pyranose
// M-peptide
// NON-POLYMER
// P-peptide
// RNA
// furanose
// non-polymer
// non_polymer
// peptide
// pyranose
// saccharide
if
(
cif
::
iequals
(
mID
,
"gly"
))
result
=
"peptide linking"
;
else
if
(
cif
::
iequals
(
mGroup
,
"l-peptide"
)
or
cif
::
iequals
(
mGroup
,
"L-peptide linking"
)
or
cif
::
iequals
(
mGroup
,
"peptide"
))
result
=
"L-peptide linking"
;
else
if
(
cif
::
iequals
(
mGroup
,
"DNA"
))
result
=
"DNA linking"
;
else
if
(
cif
::
iequals
(
mGroup
,
"RNA"
))
result
=
"RNA linking"
;
// else
// result = mGroup;
return
result
;
}
bool
Compound
::
isWater
()
const
{
return
mID
==
"HOH"
or
mID
==
"H2O"
;
}
bool
Compound
::
isSugar
()
const
{
return
cif
::
iequals
(
mGroup
,
"furanose"
)
or
cif
::
iequals
(
mGroup
,
"pyranose"
);
}
CompoundAtom
Compound
::
getAtomByID
(
const
std
::
string
&
atomID
)
const
CompoundAtom
Compound
::
getAtomByID
(
const
std
::
string
&
atomID
)
const
{
CompoundAtom
result
=
{};
for
(
auto
&
a
:
mAtoms
)
for
(
auto
&
a
:
mAtoms
)
{
if
(
a
.
id
==
atomID
)
{
...
...
@@ -579,13 +156,13 @@ CompoundAtom Compound::getAtomByID(const std::string& atomID) const
}
}
if
(
result
.
id
!=
atomID
)
if
(
result
.
id
!=
atomID
)
throw
std
::
out_of_range
(
"No atom "
+
atomID
+
" in Compound "
+
mID
);
return
result
;
}
const
Compound
*
Compound
::
create
(
const
std
::
string
&
id
)
const
Compound
*
Compound
::
create
(
const
std
::
string
&
id
)
{
auto
result
=
CompoundFactory
::
instance
().
get
(
id
);
if
(
result
==
nullptr
)
...
...
@@ -593,149 +170,44 @@ const Compound* Compound::create(const std::string& id)
return
result
;
}
bool
Compound
::
atomsBonded
(
const
std
::
string
&
atomId_1
,
const
std
::
string
&
atomId_2
)
const
bool
Compound
::
atomsBonded
(
const
std
::
string
&
atomId_1
,
const
std
::
string
&
atomId_2
)
const
{
auto
i
=
find_if
(
mBonds
.
begin
(),
mBonds
.
end
(),
[
&
](
const
CompoundBond
&
b
)
{
return
(
b
.
atomID
[
0
]
==
atomId_1
and
b
.
atomID
[
1
]
==
atomId_2
)
or
(
b
.
atomID
[
0
]
==
atomId_2
and
b
.
atomID
[
1
]
==
atomId_1
);
[
&
](
const
CompoundBond
&
b
)
{
return
(
b
.
atomID
[
0
]
==
atomId_1
and
b
.
atomID
[
1
]
==
atomId_2
)
or
(
b
.
atomID
[
0
]
==
atomId_2
and
b
.
atomID
[
1
]
==
atomId_1
);
});
return
i
!=
mBonds
.
end
();
}
float
Compound
::
atomBondValue
(
const
std
::
string
&
atomId_1
,
const
std
::
string
&
atomId_2
)
const
{
auto
i
=
find_if
(
mBonds
.
begin
(),
mBonds
.
end
(),
[
&
](
const
CompoundBond
&
b
)
{
return
(
b
.
atomID
[
0
]
==
atomId_1
and
b
.
atomID
[
1
]
==
atomId_2
)
or
(
b
.
atomID
[
0
]
==
atomId_2
and
b
.
atomID
[
1
]
==
atomId_1
);
});
return
i
!=
mBonds
.
end
()
?
i
->
distance
:
0
;
}
// float Compound::atomBondValue(const std::string &atomId_1, const std::string &atomId_2) const
// {
// auto i = find_if(mBonds.begin(), mBonds.end(),
// [&](const CompoundBond &b) {
// return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2) or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
// });
bool
Compound
::
isIsomerOf
(
const
Compound
&
c
)
const
{
bool
result
=
false
;
for
(;;)
{
// easy tests first
if
(
mID
==
c
.
mID
)
{
result
=
true
;
break
;
}
if
(
mAtoms
.
size
()
!=
c
.
mAtoms
.
size
())
break
;
if
(
mBonds
.
size
()
!=
c
.
mBonds
.
size
())
break
;
if
(
mChiralCentres
.
size
()
!=
c
.
mChiralCentres
.
size
())
break
;
// return i != mBonds.end() ? i->distance : 0;
// }
// same number of atoms of each type?
std
::
map
<
AtomType
,
int
>
aTypeCount
,
bTypeCount
;
bool
sameAtomNames
=
true
;
for
(
size_t
i
=
0
;
i
<
mAtoms
.
size
();
++
i
)
{
auto
&
a
=
mAtoms
[
i
];
auto
&
b
=
c
.
mAtoms
[
i
];
aTypeCount
[
a
.
typeSymbol
]
+=
1
;
bTypeCount
[
b
.
typeSymbol
]
+=
1
;
if
(
a
.
id
!=
b
.
id
or
a
.
typeSymbol
!=
b
.
typeSymbol
)
sameAtomNames
=
false
;
}
if
(
not
sameAtomNames
and
aTypeCount
!=
bTypeCount
)
break
;
bool
sameBonds
=
sameAtomNames
;
for
(
size_t
i
=
0
;
sameBonds
and
i
<
mBonds
.
size
();
++
i
)
{
sameBonds
=
mBonds
[
i
].
atomID
[
0
]
==
c
.
mBonds
[
i
].
atomID
[
0
]
and
mBonds
[
i
].
atomID
[
1
]
==
c
.
mBonds
[
i
].
atomID
[
1
]
and
mBonds
[
i
].
type
==
c
.
mBonds
[
i
].
type
;
}
if
(
sameBonds
)
{
result
=
true
;
break
;
}
// implement rest of tests
std
::
vector
<
std
::
tuple
<
std
::
string
,
std
::
string
>>
mapping
;
result
=
StructuresAreIsomeric
(
mAtoms
,
mBonds
,
c
.
mAtoms
,
c
.
mBonds
,
mapping
);
if
(
cif
::
VERBOSE
and
result
)
{
for
(
auto
&
m
:
mapping
)
std
::
cerr
<<
" "
<<
std
::
get
<
0
>
(
m
)
<<
" => "
<<
std
::
get
<
1
>
(
m
)
<<
std
::
endl
;
}
break
;
}
return
result
;
}
std
::
vector
<
std
::
tuple
<
std
::
string
,
std
::
string
>>
Compound
::
mapToIsomer
(
const
Compound
&
c
)
const
{
std
::
vector
<
std
::
tuple
<
std
::
string
,
std
::
string
>>
result
;
bool
check
=
StructuresAreIsomeric
(
mAtoms
,
mBonds
,
c
.
mAtoms
,
c
.
mBonds
,
result
);
if
(
not
check
)
throw
std
::
runtime_error
(
"Compounds "
+
id
()
+
" and "
+
c
.
id
()
+
" are not isomers in call to mapToIsomer"
);
return
result
;
}
// float Compound::bondAngle(const std::string &atomId_1, const std::string &atomId_2, const std::string &atomId_3) const
// {
// float result = nanf("1");
std
::
vector
<
std
::
string
>
Compound
::
isomers
()
const
{
std
::
vector
<
std
::
string
>
result
;
auto
&
db
=
IsomerDB
::
instance
();
if
(
db
.
count
(
mID
))
{
result
=
db
[
mID
];
auto
i
=
find
(
result
.
begin
(),
result
.
end
(),
mID
);
assert
(
i
!=
result
.
end
());
result
.
erase
(
i
);
}
return
result
;
}
// for (auto &a : mAngles)
// {
// if (not(a.atomID[1] == atomId_2 and
// ((a.atomID[0] == atomId_1 and a.atomID[2] == atomId_3) or
// (a.atomID[2] == atomId_1 and a.atomID[0] == atomId_3))))
// continue;
float
Compound
::
bondAngle
(
const
std
::
string
&
atomId_1
,
const
std
::
string
&
atomId_2
,
const
std
::
string
&
atomId_3
)
const
{
float
result
=
nanf
(
"1"
);
for
(
auto
&
a
:
mAngles
)
{
if
(
not
(
a
.
atomID
[
1
]
==
atomId_2
and
((
a
.
atomID
[
0
]
==
atomId_1
and
a
.
atomID
[
2
]
==
atomId_3
)
or
(
a
.
atomID
[
2
]
==
atomId_1
and
a
.
atomID
[
0
]
==
atomId_3
))))
continue
;
result
=
a
.
angle
;
break
;
}
return
result
;
}
// result = a.angle;
// break;
// }
// return result;
// }
//static float calcC(float a, float b, float alpha)
//{
...
...
@@ -743,288 +215,85 @@ float Compound::bondAngle(const std::string& atomId_1, const std::string& atomId
// float d = sqrt(b * b - f * f);
// float e = a - d;
// float c = sqrt(f * f + e * e);
//
//
// return c;
//}
float
Compound
::
chiralVolume
(
const
std
::
string
&
centreID
)
const
{
float
result
=
0
;
for
(
auto
&
cv
:
mChiralCentres
)
{
if
(
cv
.
id
!=
centreID
)
continue
;
// calculate the expected chiral volume
// the edges
float
a
=
atomBondValue
(
cv
.
atomIDCentre
,
cv
.
atomID
[
0
]);
float
b
=
atomBondValue
(
cv
.
atomIDCentre
,
cv
.
atomID
[
1
]);
float
c
=
atomBondValue
(
cv
.
atomIDCentre
,
cv
.
atomID
[
2
]);
// the angles for the top of the tetrahedron
float
alpha
=
bondAngle
(
cv
.
atomID
[
0
],
cv
.
atomIDCentre
,
cv
.
atomID
[
1
]);
float
beta
=
bondAngle
(
cv
.
atomID
[
1
],
cv
.
atomIDCentre
,
cv
.
atomID
[
2
]);
float
gamma
=
bondAngle
(
cv
.
atomID
[
2
],
cv
.
atomIDCentre
,
cv
.
atomID
[
0
]);
float
cosa
=
cos
(
alpha
*
kPI
/
180
);
float
cosb
=
cos
(
beta
*
kPI
/
180
);
float
cosc
=
cos
(
gamma
*
kPI
/
180
);
result
=
(
a
*
b
*
c
*
sqrt
(
1
+
2
*
cosa
*
cosb
*
cosc
-
(
cosa
*
cosa
)
-
(
cosb
*
cosb
)
-
(
cosc
*
cosc
)))
/
6
;
if
(
cv
.
volumeSign
==
negativ
)
result
=
-
result
;
break
;
}
return
result
;
}
// float Compound::chiralVolume(const std::string ¢reID) const
// {
// float result = 0;
// --------------------------------------------------------------------
// for (auto &cv : mChiralCentres)
// {
// if (cv.id != centreID)
// continue;
Link
::
Link
(
cif
::
Datablock
&
db
)
{
mID
=
db
.
getName
();
auto
&
linkBonds
=
db
[
"chem_link_bond"
];
for
(
auto
row
:
linkBonds
)
{
LinkBond
b
;
std
::
string
type
,
aromatic
;
cif
::
tie
(
b
.
atom
[
0
].
compID
,
b
.
atom
[
0
].
atomID
,
b
.
atom
[
1
].
compID
,
b
.
atom
[
1
].
atomID
,
type
,
b
.
distance
,
b
.
esd
)
=
row
.
get
(
"atom_1_comp_id"
,
"atom_id_1"
,
"atom_2_comp_id"
,
"atom_id_2"
,
"type"
,
"value_dist"
,
"value_dist_esd"
);
using
cif
::
iequals
;
if
(
iequals
(
type
,
"single"
)
or
iequals
(
type
,
"sing"
))
b
.
type
=
singleBond
;
else
if
(
iequals
(
type
,
"double"
)
or
iequals
(
type
,
"doub"
))
b
.
type
=
doubleBond
;
else
if
(
iequals
(
type
,
"triple"
)
or
iequals
(
type
,
"trip"
))
b
.
type
=
tripleBond
;
else
if
(
iequals
(
type
,
"deloc"
)
or
iequals
(
type
,
"aromat"
)
or
iequals
(
type
,
"aromatic"
))
b
.
type
=
delocalizedBond
;
else
{
if
(
cif
::
VERBOSE
)
std
::
cerr
<<
"Unimplemented chem_link_bond.type "
<<
type
<<
" in "
<<
mID
<<
std
::
endl
;
b
.
type
=
singleBond
;
}
// if (b.atom[0] > b.atom[1])
// swap(b.atom[0], b.atom[1]);
mBonds
.
push_back
(
b
);
}
// sort(mBonds.begin(), mBonds.end(), LinkBondLess());
// // calculate the expected chiral volume
auto
&
linkAngles
=
db
[
"chem_link_angle"
];
for
(
auto
row
:
linkAngles
)
{
LinkAngle
a
;
cif
::
tie
(
a
.
atom
[
0
].
compID
,
a
.
atom
[
0
].
atomID
,
a
.
atom
[
1
].
compID
,
a
.
atom
[
1
].
atomID
,
a
.
atom
[
2
].
compID
,
a
.
atom
[
2
].
atomID
,
a
.
angle
,
a
.
esd
)
=
row
.
get
(
"atom_1_comp_id"
,
"atom_id_1"
,
"atom_2_comp_id"
,
"atom_id_2"
,
"atom_3_comp_id"
,
"atom_id_3"
,
"value_angle"
,
"value_angle_esd"
);
mAngles
.
push_back
(
a
);
}
// // the edges
for
(
auto
row
:
db
[
"chem_link_tor"
])
{
LinkTorsion
a
;
cif
::
tie
(
a
.
atom
[
0
].
compID
,
a
.
atom
[
0
].
atomID
,
a
.
atom
[
1
].
compID
,
a
.
atom
[
1
].
atomID
,
a
.
atom
[
2
].
compID
,
a
.
atom
[
2
].
atomID
,
a
.
atom
[
3
].
compID
,
a
.
atom
[
3
].
atomID
,
a
.
angle
,
a
.
esd
,
a
.
period
)
=
row
.
get
(
"atom_1_comp_id"
,
"atom_id_1"
,
"atom_2_comp_id"
,
"atom_id_2"
,
"atom_3_comp_id"
,
"atom_id_3"
,
"atom_4_comp_id"
,
"atom_id_4"
,
"value_angle"
,
"value_angle_esd"
,
"period"
);
mTorsions
.
push_back
(
a
);
}
// float a = atomBondValue(cv.atomIDCentre, cv.atomID[0]);
// float b = atomBondValue(cv.atomIDCentre, cv.atomID[1]);
// float c = atomBondValue(cv.atomIDCentre, cv.atomID[2]);
auto
&
linkChir
=
db
[
"chem_link_chir"
];
for
(
auto
row
:
linkChir
)
{
LinkChiralCentre
cc
;
std
::
string
volumeSign
;
cif
::
tie
(
cc
.
id
,
cc
.
atomCentre
.
compID
,
cc
.
atomCentre
.
atomID
,
cc
.
atom
[
0
].
compID
,
cc
.
atom
[
0
].
atomID
,
cc
.
atom
[
1
].
compID
,
cc
.
atom
[
1
].
atomID
,
cc
.
atom
[
2
].
compID
,
cc
.
atom
[
2
].
atomID
,
volumeSign
)
=
row
.
get
(
"id"
,
"atom_centre_comp_id"
,
"atom_id_centre"
,
"atom_1_comp_id"
,
"atom_id_1"
,
"atom_2_comp_id"
,
"atom_id_2"
,
"atom_3_comp_id"
,
"atom_id_3"
,
"volume_sign"
);
if
(
volumeSign
==
"negativ"
or
volumeSign
==
"negative"
)
cc
.
volumeSign
=
negativ
;
else
if
(
volumeSign
==
"positiv"
or
volumeSign
==
"positive"
)
cc
.
volumeSign
=
positiv
;
else
if
(
volumeSign
==
"both"
)
cc
.
volumeSign
=
both
;
else
{
if
(
cif
::
VERBOSE
)
std
::
cerr
<<
"Unimplemented chem_link_chir.volume_sign "
<<
volumeSign
<<
" in "
<<
mID
<<
std
::
endl
;
continue
;
}
mChiralCentres
.
push_back
(
cc
);
}
// // the angles for the top of the tetrahedron
auto
&
linkPlanes
=
db
[
"chem_link_plane"
];
for
(
auto
row
:
linkPlanes
)
{
int
compID
;
std
::
string
atomID
,
planeID
;
float
esd
;
cif
::
tie
(
planeID
,
compID
,
atomID
,
esd
)
=
row
.
get
(
"plane_id"
,
"atom_comp_id"
,
"atom_id"
,
"dist_esd"
);
auto
i
=
find_if
(
mPlanes
.
begin
(),
mPlanes
.
end
(),
[
&
](
auto
&
p
)
{
return
p
.
id
==
planeID
;});
if
(
i
==
mPlanes
.
end
())
{
std
::
vector
<
LinkAtom
>
atoms
{
LinkAtom
{
compID
,
atomID
}};
mPlanes
.
emplace_back
(
LinkPlane
{
planeID
,
move
(
atoms
),
esd
});
}
else
i
->
atoms
.
push_back
({
compID
,
atomID
});
}
}
// float alpha = bondAngle(cv.atomID[0], cv.atomIDCentre, cv.atomID[1]);
// float beta = bondAngle(cv.atomID[1], cv.atomIDCentre, cv.atomID[2]);
// float gamma = bondAngle(cv.atomID[2], cv.atomIDCentre, cv.atomID[0]);
const
Link
&
Link
::
create
(
const
std
::
string
&
id
)
{
auto
result
=
CompoundFactory
::
instance
().
getLink
(
id
);
if
(
result
==
nullptr
)
result
=
CompoundFactory
::
instance
().
createLink
(
id
);
if
(
result
==
nullptr
)
throw
std
::
runtime_error
(
"Link with id "
+
id
+
" not found"
);
return
*
result
;
}
// float cosa = cos(alpha * kPI / 180);
// float cosb = cos(beta * kPI / 180);
// float cosc = cos(gamma * kPI / 180);
Link
::~
Link
()
{
}
// result = (a * b * c * sqrt(1 + 2 * cosa * cosb * cosc - (cosa * cosa) - (cosb * cosb) - (cosc * cosc))) / 6;
float
Link
::
atomBondValue
(
const
LinkAtom
&
atom1
,
const
LinkAtom
&
atom2
)
const
{
auto
i
=
find_if
(
mBonds
.
begin
(),
mBonds
.
end
(),
[
&
](
auto
&
b
)
{
return
(
b
.
atom
[
0
]
==
atom1
and
b
.
atom
[
1
]
==
atom2
)
or
(
b
.
atom
[
0
]
==
atom2
and
b
.
atom
[
1
]
==
atom1
);
});
return
i
!=
mBonds
.
end
()
?
i
->
distance
:
0
;
}
// if (cv.volumeSign == negativ)
// result = -result;
float
Link
::
bondAngle
(
const
LinkAtom
&
atom1
,
const
LinkAtom
&
atom2
,
const
LinkAtom
&
atom3
)
const
{
float
result
=
nanf
(
"1"
);
for
(
auto
&
a
:
mAngles
)
{
if
(
not
(
a
.
atom
[
1
]
==
atom2
and
((
a
.
atom
[
0
]
==
atom1
and
a
.
atom
[
2
]
==
atom3
)
or
(
a
.
atom
[
2
]
==
atom1
and
a
.
atom
[
0
]
==
atom3
))))
continue
;
result
=
a
.
angle
;
break
;
}
return
result
;
}
// break;
// }
float
Link
::
chiralVolume
(
const
std
::
string
&
centreID
)
const
{
float
result
=
0
;
for
(
auto
&
cv
:
mChiralCentres
)
{
if
(
cv
.
id
!=
centreID
)
continue
;
// calculate the expected chiral volume
// the edges
float
a
=
atomBondValue
(
cv
.
atomCentre
,
cv
.
atom
[
0
]);
float
b
=
atomBondValue
(
cv
.
atomCentre
,
cv
.
atom
[
1
]);
float
c
=
atomBondValue
(
cv
.
atomCentre
,
cv
.
atom
[
2
]);
// the angles for the top of the tetrahedron
float
alpha
=
bondAngle
(
cv
.
atom
[
0
],
cv
.
atomCentre
,
cv
.
atom
[
1
]);
float
beta
=
bondAngle
(
cv
.
atom
[
1
],
cv
.
atomCentre
,
cv
.
atom
[
2
]);
float
gamma
=
bondAngle
(
cv
.
atom
[
2
],
cv
.
atomCentre
,
cv
.
atom
[
0
]);
float
cosa
=
cos
(
alpha
*
kPI
/
180
);
float
cosb
=
cos
(
beta
*
kPI
/
180
);
float
cosc
=
cos
(
gamma
*
kPI
/
180
);
result
=
(
a
*
b
*
c
*
sqrt
(
1
+
2
*
cosa
*
cosb
*
cosc
-
(
cosa
*
cosa
)
-
(
cosb
*
cosb
)
-
(
cosc
*
cosc
)))
/
6
;
if
(
cv
.
volumeSign
==
negativ
)
result
=
-
result
;
break
;
}
return
result
;
}
// return result;
// }
// --------------------------------------------------------------------
// a factory class to generate compounds
const
std
::
map
<
std
::
string
,
char
>
kAAMap
{
{
"ALA"
,
'A'
},
{
"ARG"
,
'R'
},
{
"ASN"
,
'N'
},
{
"ASP"
,
'D'
},
{
"CYS"
,
'C'
},
{
"GLN"
,
'Q'
},
{
"GLU"
,
'E'
},
{
"GLY"
,
'G'
},
{
"HIS"
,
'H'
},
{
"ILE"
,
'I'
},
{
"LEU"
,
'L'
},
{
"LYS"
,
'K'
},
{
"MET"
,
'M'
},
{
"PHE"
,
'F'
},
{
"PRO"
,
'P'
},
{
"SER"
,
'S'
},
{
"THR"
,
'T'
},
{
"TRP"
,
'W'
},
{
"TYR"
,
'Y'
},
{
"VAL"
,
'V'
},
{
"GLX"
,
'Z'
},
{
"ASX"
,
'B'
}
};
const
std
::
map
<
std
::
string
,
char
>
kBaseMap
{
{
"A"
,
'A'
},
{
"C"
,
'C'
},
{
"G"
,
'G'
},
{
"T"
,
'T'
},
{
"U"
,
'U'
},
{
"DA"
,
'A'
},
{
"DC"
,
'C'
},
{
"DG"
,
'G'
},
{
"DT"
,
'T'
}
};
const
std
::
map
<
std
::
string
,
char
>
kAAMap
{
{
"ALA"
,
'A'
},
{
"ARG"
,
'R'
},
{
"ASN"
,
'N'
},
{
"ASP"
,
'D'
},
{
"CYS"
,
'C'
},
{
"GLN"
,
'Q'
},
{
"GLU"
,
'E'
},
{
"GLY"
,
'G'
},
{
"HIS"
,
'H'
},
{
"ILE"
,
'I'
},
{
"LEU"
,
'L'
},
{
"LYS"
,
'K'
},
{
"MET"
,
'M'
},
{
"PHE"
,
'F'
},
{
"PRO"
,
'P'
},
{
"SER"
,
'S'
},
{
"THR"
,
'T'
},
{
"TRP"
,
'W'
},
{
"TYR"
,
'Y'
},
{
"VAL"
,
'V'
},
{
"GLX"
,
'Z'
},
{
"ASX"
,
'B'
}};
const
std
::
map
<
std
::
string
,
char
>
kBaseMap
{
{
"A"
,
'A'
},
{
"C"
,
'C'
},
{
"G"
,
'G'
},
{
"T"
,
'T'
},
{
"U"
,
'U'
},
{
"DA"
,
'A'
},
{
"DC"
,
'C'
},
{
"DG"
,
'G'
},
{
"DT"
,
'T'
}};
// --------------------------------------------------------------------
...
...
@@ -1033,20 +302,17 @@ class CompoundFactoryImpl
public
:
CompoundFactoryImpl
();
CompoundFactoryImpl
(
const
std
::
string
&
file
,
CompoundFactoryImpl
*
next
);
CompoundFactoryImpl
(
const
std
::
string
&
file
,
CompoundFactoryImpl
*
next
);
~
CompoundFactoryImpl
()
{
delete
mNext
;
}
Compound
*
get
(
std
::
string
id
);
Compound
*
create
(
std
::
string
id
);
Compound
*
get
(
std
::
string
id
);
Compound
*
create
(
std
::
string
id
);
const
Link
*
getLink
(
std
::
string
id
);
const
Link
*
createLink
(
std
::
string
id
);
CompoundFactoryImpl
*
pop
()
CompoundFactoryImpl
*
pop
()
{
auto
result
=
mNext
;
mNext
=
nullptr
;
...
...
@@ -1054,93 +320,65 @@ class CompoundFactoryImpl
return
result
;
}
std
::
string
unalias
(
const
std
::
string
&
resName
)
const
{
std
::
string
result
=
resName
;
auto
&
e
=
const_cast
<
cif
::
File
&>
(
mFile
)[
"comp_synonym_list"
];
for
(
auto
&
synonym
:
e
[
"chem_comp_synonyms"
])
{
if
(
ba
::
iequals
(
synonym
[
"comp_alternative_id"
].
as
<
std
::
string
>
(),
resName
)
==
false
)
continue
;
result
=
synonym
[
"comp_id"
].
as
<
std
::
string
>
();
ba
::
trim
(
result
);
break
;
}
if
(
result
.
empty
()
and
mNext
)
result
=
mNext
->
unalias
(
resName
);
return
result
;
}
bool
isKnownPeptide
(
const
std
::
string
&
resName
)
bool
isKnownPeptide
(
const
std
::
string
&
resName
)
{
return
mKnownPeptides
.
count
(
resName
)
or
(
mNext
!=
nullptr
and
mNext
->
isKnownPeptide
(
resName
));
(
mNext
!=
nullptr
and
mNext
->
isKnownPeptide
(
resName
));
}
bool
isKnownBase
(
const
std
::
string
&
resName
)
bool
isKnownBase
(
const
std
::
string
&
resName
)
{
return
mKnownBases
.
count
(
resName
)
or
(
mNext
!=
nullptr
and
mNext
->
isKnownBase
(
resName
));
(
mNext
!=
nullptr
and
mNext
->
isKnownBase
(
resName
));
}
private
:
std
::
shared_timed_mutex
mMutex
;
std
::
string
mPath
;
std
::
vector
<
Compound
*>
mCompounds
;
std
::
vector
<
const
Link
*>
mLinks
;
std
::
set
<
std
::
string
>
mKnownPeptides
;
std
::
set
<
std
::
string
>
mKnownBases
;
std
::
set
<
std
::
string
>
mMissing
;
cif
::
File
mFile
;
CompoundFactoryImpl
*
mNext
=
nullptr
;
std
::
shared_timed_mutex
mMutex
;
std
::
string
mPath
;
std
::
vector
<
Compound
*>
mCompounds
;
std
::
set
<
std
::
string
>
mKnownPeptides
;
std
::
set
<
std
::
string
>
mKnownBases
;
std
::
set
<
std
::
string
>
mMissing
;
CompoundFactoryImpl
*
mNext
=
nullptr
;
};
// --------------------------------------------------------------------
CompoundFactoryImpl
::
CompoundFactoryImpl
()
{
for
(
const
auto
&
[
key
,
value
]
:
kAAMap
)
for
(
const
auto
&
[
key
,
value
]
:
kAAMap
)
mKnownPeptides
.
insert
(
key
);
for
(
const
auto
&
[
key
,
value
]
:
kBaseMap
)
for
(
const
auto
&
[
key
,
value
]
:
kBaseMap
)
mKnownBases
.
insert
(
key
);
}
CompoundFactoryImpl
::
CompoundFactoryImpl
(
const
std
::
string
&
file
,
CompoundFactoryImpl
*
next
)
:
mPath
(
file
),
mFile
(
file
),
mNext
(
next
)
CompoundFactoryImpl
::
CompoundFactoryImpl
(
const
std
::
string
&
file
,
CompoundFactoryImpl
*
next
)
:
mPath
(
file
)
,
mNext
(
next
)
{
const
std
::
regex
peptideRx
(
"(?:[lmp]-)?peptide"
,
std
::
regex
::
icase
);
cif
::
File
cifFile
(
file
);
if
(
not
cifFile
.
isValid
())
throw
std
::
runtime_error
(
"Invalid compound file"
);
auto
&
cat
=
mFile
.
firstDatablock
()[
"chem_comp"
];
for
(
auto
&
chemComp
:
cat
)
for
(
auto
&
db
:
cifFile
)
{
std
::
string
group
,
threeLetterCode
;
cif
::
tie
(
group
,
threeLetterCode
)
=
chemComp
.
get
(
"group"
,
"three_letter_code"
);
if
(
std
::
regex_match
(
group
,
peptideRx
))
mKnownPeptides
.
insert
(
threeLetterCode
);
else
if
(
ba
::
iequals
(
group
,
"DNA"
)
or
ba
::
iequals
(
group
,
"RNA"
))
mKnownBases
.
insert
(
threeLetterCode
);
auto
compound
=
std
::
make_unique
<
Compound
>
(
db
);
mCompounds
.
push_back
(
compound
.
release
());
}
}
Compound
*
CompoundFactoryImpl
::
get
(
std
::
string
id
)
Compound
*
CompoundFactoryImpl
::
get
(
std
::
string
id
)
{
std
::
shared_lock
lock
(
mMutex
);
ba
::
to_upper
(
id
);
Compound
*
result
=
nullptr
;
for
(
auto
cmp
:
mCompounds
)
Compound
*
result
=
nullptr
;
for
(
auto
cmp
:
mCompounds
)
{
if
(
cmp
->
id
()
==
id
)
{
...
...
@@ -1148,119 +386,72 @@ Compound* CompoundFactoryImpl::get(std::string id)
break
;
}
}
if
(
result
==
nullptr
and
mNext
!=
nullptr
)
result
=
mNext
->
get
(
id
);
return
result
;
}
Compound
*
CompoundFactoryImpl
::
create
(
std
::
string
id
)
Compound
*
CompoundFactoryImpl
::
create
(
std
::
string
id
)
{
ba
::
to_upper
(
id
);
Compound
*
result
=
get
(
id
);
if
(
result
==
nullptr
and
mMissing
.
count
(
id
)
==
0
and
not
mFile
.
empty
())
{
std
::
unique_lock
lock
(
mMutex
);
Compound
*
result
=
get
(
id
);
//
if (result == nullptr and mMissing.count(id) == 0 and not mFile.empty())
//
{
//
std::unique_lock lock(mMutex);
auto
&
cat
=
mFile
.
firstDatablock
()[
"chem_comp"
];
// auto &
cat = mFile.firstDatablock()["chem_comp"];
auto
rs
=
cat
.
find
(
cif
::
Key
(
"three_letter_code"
)
==
id
);
if
(
not
rs
.
empty
())
{
auto
row
=
rs
.
front
();
std
::
string
name
,
group
;
uint32_t
numberAtomsAll
,
numberAtomsNh
;
cif
::
tie
(
name
,
group
,
numberAtomsAll
,
numberAtomsNh
)
=
row
.
get
(
"name"
,
"group"
,
"number_atoms_all"
,
"number_atoms_nh"
);
ba
::
trim
(
name
);
ba
::
trim
(
group
);
if
(
mFile
.
get
(
"comp_"
+
id
)
==
nullptr
)
{
auto
clibd_mon
=
fs
::
path
(
getenv
(
"CLIBD_MON"
));
fs
::
path
resFile
=
clibd_mon
/
ba
::
to_lower_copy
(
id
.
substr
(
0
,
1
))
/
(
id
+
".cif"
);
if
(
not
fs
::
exists
(
resFile
)
and
(
id
==
"COM"
or
id
==
"CON"
or
"PRN"
))
// seriously...
resFile
=
clibd_mon
/
ba
::
to_lower_copy
(
id
.
substr
(
0
,
1
))
/
(
id
+
'_'
+
id
+
".cif"
);
if
(
not
fs
::
exists
(
resFile
))
mMissing
.
insert
(
id
);
else
{
mCompounds
.
push_back
(
new
Compound
(
resFile
.
string
(),
id
,
name
,
group
));
result
=
mCompounds
.
back
();
}
}
else
{
mCompounds
.
push_back
(
new
Compound
(
mPath
,
id
,
name
,
group
));
result
=
mCompounds
.
back
();
}
}
if
(
result
==
nullptr
and
mNext
!=
nullptr
)
result
=
mNext
->
create
(
id
);
}
return
result
;
}
// auto rs = cat.find(cif::Key("three_letter_code") == id);
const
Link
*
CompoundFactoryImpl
::
getLink
(
std
::
string
id
)
{
std
::
shared_lock
lock
(
mMutex
);
// if (not rs.empty()
)
//
{
// auto row = rs.front(
);
ba
::
to_upper
(
id
);
// std::string name, group;
// uint32_t numberAtomsAll, numberAtomsNh;
// cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
// row.get("name", "group", "number_atoms_all", "number_atoms_nh");
const
Link
*
result
=
nullptr
;
for
(
auto
link
:
mLinks
)
{
if
(
link
->
id
()
==
id
)
{
result
=
link
;
break
;
}
}
if
(
result
==
nullptr
and
mNext
!=
nullptr
)
result
=
mNext
->
getLink
(
id
);
return
result
;
}
// ba::trim(name);
// ba::trim(group);
const
Link
*
CompoundFactoryImpl
::
createLink
(
std
::
string
id
)
{
ba
::
to_upper
(
id
);
// if (mFile.get("comp_" + id) == nullptr
)
//
{
// auto clibd_mon = fs::path(getenv("CLIBD_MON")
);
const
Link
*
result
=
getLink
(
id
);
// fs::path resFile = clibd_mon / ba::to_lower_copy(id.substr(0, 1)) / (id + ".cif"
);
if
(
result
==
nullptr
)
{
std
::
unique_lock
lock
(
mMutex
);
// if (not fs::exists(resFile) and (id == "COM" or id == "CON" or "PRN")) // seriously...
// resFile = clibd_mon / ba::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
// if (not fs::exists(resFile))
// mMissing.insert(id);
// else
// {
// mCompounds.push_back(new Compound(resFile.string(), id, name, group));
// result = mCompounds.back();
// }
// }
// else
// {
// mCompounds.push_back(new Compound(mPath, id, name, group));
// result = mCompounds.back();
// }
// }
// if (result == nullptr and mNext != nullptr)
// result = mNext->create(id);
// }
auto
db
=
mFile
.
get
(
"link_"
+
id
);
if
(
db
!=
nullptr
)
{
result
=
new
Link
(
*
db
);
mLinks
.
push_back
(
result
);
}
if
(
result
==
nullptr
and
mNext
!=
nullptr
)
result
=
mNext
->
createLink
(
id
);
}
return
result
;
}
// --------------------------------------------------------------------
CompoundFactory
*
CompoundFactory
::
sInstance
;
CompoundFactory
*
CompoundFactory
::
sInstance
;
thread_local
std
::
unique_ptr
<
CompoundFactory
>
CompoundFactory
::
tlInstance
;
bool
CompoundFactory
::
sUseThreadLocalInstance
;
...
...
@@ -1272,21 +463,6 @@ void CompoundFactory::init(bool useThreadLocalInstanceOnly)
CompoundFactory
::
CompoundFactory
()
:
mImpl
(
nullptr
)
{
const
char
*
clibdMon
=
getenv
(
"CLIBD_MON"
);
if
(
clibdMon
!=
nullptr
)
{
fs
::
path
db
=
fs
::
path
(
clibdMon
)
/
"list"
/
"mon_lib_list.cif"
;
if
(
fs
::
exists
(
db
))
pushDictionary
(
db
.
string
());
}
if
(
mImpl
==
nullptr
)
{
if
(
cif
::
VERBOSE
)
std
::
cerr
<<
"Could not load the mon_lib_list.cif file from CCP4, please make sure you have installed CCP4 and sourced the environment."
<<
std
::
endl
;
mImpl
=
new
CompoundFactoryImpl
();
}
}
CompoundFactory
::~
CompoundFactory
()
...
...
@@ -1294,7 +470,7 @@ CompoundFactory::~CompoundFactory()
delete
mImpl
;
}
CompoundFactory
&
CompoundFactory
::
instance
()
CompoundFactory
&
CompoundFactory
::
instance
()
{
if
(
sUseThreadLocalInstance
)
{
...
...
@@ -1321,20 +497,20 @@ void CompoundFactory::clear()
}
}
void
CompoundFactory
::
pushDictionary
(
const
std
::
string
&
inDictFile
)
void
CompoundFactory
::
pushDictionary
(
const
std
::
string
&
inDictFile
)
{
if
(
not
fs
::
exists
(
inDictFile
))
throw
std
::
runtime_error
(
"file not found: "
+
inDictFile
);
// ifstream file(inDictFile);
// if (not file.is_open())
// throw std::runtime_error("Could not open peptide list " + inDictFile);
// ifstream file(inDictFile);
// if (not file.is_open())
// throw std::runtime_error("Could not open peptide list " + inDictFile);
try
{
mImpl
=
new
CompoundFactoryImpl
(
inDictFile
,
mImpl
);
}
catch
(
const
std
::
exception
&
ex
)
catch
(
const
std
::
exception
&
ex
)
{
std
::
cerr
<<
"Error loading dictionary "
<<
inDictFile
<<
std
::
endl
;
throw
;
...
...
@@ -1348,39 +524,32 @@ void CompoundFactory::popDictionary()
}
// id is the three letter code
const
Compound
*
CompoundFactory
::
get
(
std
::
string
id
)
const
Compound
*
CompoundFactory
::
get
(
std
::
string
id
)
{
return
mImpl
->
get
(
id
);
}
const
Compound
*
CompoundFactory
::
create
(
std
::
string
id
)
const
Compound
*
CompoundFactory
::
create
(
std
::
string
id
)
{
return
mImpl
->
create
(
id
);
}
const
Link
*
CompoundFactory
::
getLink
(
std
::
string
id
)
{
return
mImpl
->
getLink
(
id
);
}
const
Link
*
CompoundFactory
::
createLink
(
std
::
string
id
)
{
return
mImpl
->
createLink
(
id
);
}
bool
CompoundFactory
::
isKnownPeptide
(
const
std
::
string
&
resName
)
const
bool
CompoundFactory
::
isKnownPeptide
(
const
std
::
string
&
resName
)
const
{
return
mImpl
->
isKnownPeptide
(
resName
);
}
bool
CompoundFactory
::
isKnownBase
(
const
std
::
string
&
resName
)
const
bool
CompoundFactory
::
isKnownBase
(
const
std
::
string
&
resName
)
const
{
return
mImpl
->
isKnownBase
(
resName
);
}
std
::
string
CompoundFactory
::
unalias
(
const
std
::
string
&
resName
)
const
// --------------------------------------------------------------------
std
::
vector
<
std
::
string
>
Compound
::
addExtraComponents
(
const
std
::
filesystem
::
path
&
components
)
{
return
mImpl
->
unalias
(
resName
);
}
}
}
// namespace mmcif
src/PDB2Cif.cpp
View file @
915d6504
...
...
@@ -457,7 +457,6 @@ class PDBFileParser
char
iCode
;
int
numHetAtoms
=
0
;
std
::
string
text
;
bool
sugar
=
false
;
std
::
string
asymID
;
std
::
vector
<
PDBRecord
*>
atoms
;
bool
processed
=
false
;
...
...
@@ -467,14 +466,6 @@ class PDBFileParser
HET
(
const
std
::
string
&
hetID
,
char
chainID
,
int
seqNum
,
char
iCode
,
int
numHetAtoms
=
0
,
const
std
::
string
&
text
=
{})
:
hetID
(
hetID
),
chainID
(
chainID
),
seqNum
(
seqNum
),
iCode
(
iCode
),
numHetAtoms
(
numHetAtoms
),
text
(
text
)
{
// just in case we don't have a CCP4 available
if
(
hetID
==
"MAN"
or
hetID
==
"BMA"
or
hetID
==
"NAG"
or
hetID
==
"NDG"
or
hetID
==
"FUC"
or
hetID
==
"FUL"
)
sugar
=
true
;
else
{
auto
compound
=
CompoundFactory
::
instance
().
create
(
hetID
);
sugar
=
compound
?
compound
->
isSugar
()
:
false
;
}
}
};
...
...
@@ -4374,33 +4365,38 @@ void PDBFileParser::ConstructEntities()
mMod2parent
.
count
(
cc
)
?
mMod2parent
[
cc
]
:
cc
);
std
::
string
formula
=
mFormuls
[
cc
];
if
(
formula
.
empty
()
and
compound
!=
nullptr
)
formula
=
compound
->
formula
();
else
{
const
std
::
regex
rx
(
R"(\d+\((.+)\))"
);
std
::
smatch
m
;
if
(
std
::
regex_match
(
formula
,
m
,
rx
))
formula
=
m
[
1
].
str
();
}
std
::
string
name
=
mHetnams
[
cc
];
if
(
name
.
empty
()
and
compound
!=
nullptr
)
name
=
compound
->
name
();
std
::
string
type
=
"other"
;
std
::
string
name
;
std
::
string
formula
;
std
::
string
type
;
std
::
string
nstd
=
"."
;
std
::
string
formulaWeight
;
if
(
compound
!=
nullptr
)
{
name
=
compound
->
name
();
type
=
compound
->
type
();
if
(
type
.
empty
())
type
=
"NON-POLYMER"
;
if
(
iequals
(
type
,
"L-peptide linking"
)
or
iequals
(
type
,
"peptide linking"
))
nstd
=
"y"
;
formula
=
compound
->
formula
();
formulaWeight
=
std
::
to_string
(
compound
->
formulaWeight
());
}
if
(
name
.
empty
())
name
=
mHetnams
[
cc
];
if
(
type
.
empty
())
type
=
"NON-POLYMER"
;
if
(
formula
.
empty
())
{
formula
=
mFormuls
[
cc
];
const
std
::
regex
rx
(
R"(\d+\((.+)\))"
);
std
::
smatch
m
;
if
(
std
::
regex_match
(
formula
,
m
,
rx
))
formula
=
m
[
1
].
str
();
}
if
(
modResSet
.
count
(
cc
))
...
...
@@ -4410,6 +4406,7 @@ void PDBFileParser::ConstructEntities()
{
"id"
,
cc
},
{
"name"
,
name
},
{
"formula"
,
formula
},
{
"formula_weight"
,
formulaWeight
},
{
"mon_nstd_flag"
,
nstd
},
{
"type"
,
type
}
});
...
...
src/Structure.cpp
View file @
915d6504
...
...
@@ -566,16 +566,6 @@ int Atom::charge() const
return
property
<
int
>
(
"pdbx_formal_charge"
);
}
std
::
string
Atom
::
energyType
()
const
{
std
::
string
result
;
if
(
impl
()
and
impl
()
->
mCompound
)
result
=
impl
()
->
mCompound
->
getAtomByID
(
impl
()
->
mAtomID
).
typeEnergy
;
return
result
;
}
float
Atom
::
uIso
()
const
{
float
result
;
...
...
@@ -1013,21 +1003,6 @@ bool Residue::isEntity() const
return
a1
.
size
()
==
a2
.
size
();
}
bool
Residue
::
isSugar
()
const
{
return
compound
().
isSugar
();
}
bool
Residue
::
isPyranose
()
const
{
return
cif
::
iequals
(
compound
().
group
(),
"pyranose"
);
}
bool
Residue
::
isFuranose
()
const
{
return
cif
::
iequals
(
compound
().
group
(),
"furanose"
);
}
std
::
string
Residue
::
authID
()
const
{
std
::
string
result
;
...
...
@@ -2101,7 +2076,7 @@ void Structure::insertCompound(const std::string& compoundID, bool isEntity)
{
auto
compound
=
Compound
::
create
(
compoundID
);
if
(
compound
==
nullptr
)
throw
std
::
runtime_error
(
"Trying to insert unknown compound "
+
compoundID
+
" (not found in CC
P4 monomers lib
)"
);
throw
std
::
runtime_error
(
"Trying to insert unknown compound "
+
compoundID
+
" (not found in CC
D
)"
);
cif
::
Datablock
&
db
=
*
mFile
.
impl
().
mDb
;
...
...
@@ -2113,6 +2088,7 @@ void Structure::insertCompound(const std::string& compoundID, bool isEntity)
{
"id"
,
compoundID
},
{
"name"
,
compound
->
name
()
},
{
"formula"
,
compound
->
formula
()
},
{
"formula_weight"
,
compound
->
formulaWeight
()
},
{
"type"
,
compound
->
type
()
}
});
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment