Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
libcifpp
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
open
libcifpp
Commits
915d6504
Unverified
Commit
915d6504
authored
Apr 13, 2021
by
Maarten L. Hekkelman
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
First steps, remove CCP4 info from compound
parent
5e63ca7a
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
399 additions
and
1393 deletions
+399
-1393
include/cif++/Cif++.hpp
+29
-7
include/cif++/Compound.hpp
+99
-250
include/cif++/Structure.hpp
+0
-7
src/Compound.cpp
+244
-1075
src/PDB2Cif.cpp
+25
-28
src/Structure.cpp
+2
-26
No files found.
include/cif++/Cif++.hpp
View file @
915d6504
...
@@ -289,9 +289,10 @@ namespace detail
...
@@ -289,9 +289,10 @@ namespace detail
void
swap
(
ItemReference
&
b
);
void
swap
(
ItemReference
&
b
);
template
<
typename
T
=
std
::
string
>
template
<
typename
T
=
std
::
string
>
T
as
()
const
auto
as
()
const
{
{
return
item_value_as
<
T
>::
convert
(
*
this
);
using
value_type
=
std
::
remove_cv_t
<
std
::
remove_reference_t
<
T
>>
;
return
item_value_as
<
value_type
>::
convert
(
*
this
);
}
}
template
<
typename
T
>
template
<
typename
T
>
...
@@ -337,11 +338,13 @@ namespace detail
...
@@ -337,11 +338,13 @@ namespace detail
template
<
typename
T
>
template
<
typename
T
>
struct
ItemReference
::
item_value_as
<
T
,
std
::
enable_if_t
<
std
::
is_floating_point_v
<
T
>>>
struct
ItemReference
::
item_value_as
<
T
,
std
::
enable_if_t
<
std
::
is_floating_point_v
<
T
>>>
{
{
static
T
convert
(
const
ItemReference
&
ref
)
using
value_type
=
std
::
remove_reference_t
<
std
::
remove_cv_t
<
T
>>
;
static
value_type
convert
(
const
ItemReference
&
ref
)
{
{
T
result
=
{};
value_type
result
=
{};
if
(
not
ref
.
empty
())
if
(
not
ref
.
empty
())
result
=
static_cast
<
T
>
(
std
::
stod
(
ref
.
c_str
()));
result
=
static_cast
<
value_type
>
(
std
::
stod
(
ref
.
c_str
()));
return
result
;
return
result
;
}
}
...
@@ -376,7 +379,7 @@ namespace detail
...
@@ -376,7 +379,7 @@ namespace detail
};
};
template
<
typename
T
>
template
<
typename
T
>
struct
ItemReference
::
item_value_as
<
T
,
std
::
enable_if_t
<
std
::
is_integral_v
<
T
>
and
std
::
is_unsigned_v
<
T
>>>
struct
ItemReference
::
item_value_as
<
T
,
std
::
enable_if_t
<
std
::
is_integral_v
<
T
>
and
std
::
is_unsigned_v
<
T
>
and
not
std
::
is_same_v
<
T
,
bool
>
>>
{
{
static
T
convert
(
const
ItemReference
&
ref
)
static
T
convert
(
const
ItemReference
&
ref
)
{
{
...
@@ -417,7 +420,7 @@ namespace detail
...
@@ -417,7 +420,7 @@ namespace detail
};
};
template
<
typename
T
>
template
<
typename
T
>
struct
ItemReference
::
item_value_as
<
T
,
std
::
enable_if_t
<
std
::
is_integral_v
<
T
>
and
std
::
is_signed_v
<
T
>>>
struct
ItemReference
::
item_value_as
<
T
,
std
::
enable_if_t
<
std
::
is_integral_v
<
T
>
and
std
::
is_signed_v
<
T
>
and
not
std
::
is_same_v
<
T
,
bool
>
>>
{
{
static
T
convert
(
const
ItemReference
&
ref
)
static
T
convert
(
const
ItemReference
&
ref
)
{
{
...
@@ -482,6 +485,25 @@ namespace detail
...
@@ -482,6 +485,25 @@ namespace detail
}
}
};
};
template
<
typename
T
>
struct
ItemReference
::
item_value_as
<
T
,
std
::
enable_if_t
<
std
::
is_same_v
<
T
,
bool
>>>
{
static
bool
convert
(
const
ItemReference
&
ref
)
{
bool
result
=
false
;
if
(
not
ref
.
empty
())
result
=
iequals
(
ref
.
c_str
(),
"y"
);
return
result
;
}
static
int
compare
(
const
ItemReference
&
ref
,
bool
value
,
bool
icase
)
{
bool
rv
=
convert
(
ref
);
return
value
&&
rv
?
0
:
(
rv
<
value
?
-
1
:
1
);
}
};
template
<
size_t
N
>
template
<
size_t
N
>
struct
ItemReference
::
item_value_as
<
char
[
N
]
>
struct
ItemReference
::
item_value_as
<
char
[
N
]
>
{
{
...
...
include/cif++/Compound.hpp
View file @
915d6504
...
@@ -26,10 +26,13 @@
...
@@ -26,10 +26,13 @@
#pragma once
#pragma once
/// \file This file contains the definition for the class Compound, encapsulating
/// the information found for compounds in the CCD.
#include <map>
#include <set>
#include <set>
#include <tuple>
#include <tuple>
#include <vector>
#include <vector>
#include <map>
#include "cif++/AtomType.hpp"
#include "cif++/AtomType.hpp"
#include "cif++/Cif++.hpp"
#include "cif++/Cif++.hpp"
...
@@ -38,310 +41,156 @@ namespace mmcif
...
@@ -38,310 +41,156 @@ namespace mmcif
{
{
// --------------------------------------------------------------------
// --------------------------------------------------------------------
// The chemical composition of the structure in an mmCIF file is
// defined in the class composition. A compositon consists of
// entities. Each Entity can be either a polymer, a non-polymer
// a macrolide or a water molecule.
// Entities themselves are made up of compounds. And compounds
// contain CompoundAtom records for each atom.
class
Compound
;
class
Compound
;
class
Link
;
struct
CompoundAtom
;
struct
CompoundAtom
;
enum
BondType
{
singleBond
,
doubleBond
,
tripleBond
,
delocalizedBond
};
/// \brief The bond type as defined in the CCD, possible values taken from the mmcif_pdbx_v50 file
enum
class
BondType
// --------------------------------------------------------------------
{
// struct containing information about an atom in a chemical compound
sing
,
// 'single bond'
// This information comes from the CCP4 monomer library.
doub
,
// 'double bond'
trip
,
// 'triple bond'
struct
CompoundAtom
quad
,
// 'quadruple bond'
{
arom
,
// 'aromatic bond'
std
::
string
id
;
poly
,
// 'polymeric bond'
AtomType
typeSymbol
;
delo
,
// 'delocalized double bond'
std
::
string
typeEnergy
;
pi
,
// 'pi bond'
float
partialCharge
;
};
// --------------------------------------------------------------------
// struct containing information about the bonds
// This information comes from the CCP4 monomer library.
struct
CompoundBond
{
std
::
string
atomID
[
2
];
BondType
type
;
float
distance
;
float
esd
;
};
// --------------------------------------------------------------------
// struct containing information about the bond-angles
// This information comes from the CCP4 monomer library.
struct
CompoundAngle
{
std
::
string
atomID
[
3
];
float
angle
;
float
esd
;
};
};
// --------------------------------------------------------------------
std
::
string
to_string
(
BondType
bondType
);
// struct containing information about the bond-angles
BondType
from_string
(
const
std
::
string
&
bondType
);
// This information comes from the CCP4 monomer library.
struct
CompoundTorsion
/// --------------------------------------------------------------------
{
/// \brief struct containing information about an atom in a chemical compound.
std
::
string
atomID
[
4
];
/// This is a subset of the available information. Contact the author if you need more fields.
float
angle
;
float
esd
;
int
period
;
};
// --------------------------------------------------------------------
struct
CompoundAtom
// struct containing information about the bond-angles
// This information comes from the CCP4 monomer library.
struct
CompoundPlane
{
{
std
::
string
id
;
std
::
string
id
;
std
::
vector
<
std
::
string
>
atomID
;
AtomType
typeSymbol
;
float
esd
;
int
charge
;
bool
aromatic
;
bool
leavingAtom
;
bool
stereoConfig
;
float
x
,
y
,
z
;
};
};
// --------------------------------------------------------------------
/// --------------------------------------------------------------------
// struct containing information about a chiral centre
/// \brief struct containing information about the bonds
// This information comes from the CCP4 monomer library.
enum
ChiralVolumeSign
{
negativ
,
positiv
,
both
};
struct
Compound
ChiralCentre
struct
Compound
Bond
{
{
std
::
string
id
;
std
::
string
atomID
[
2
];
std
::
string
atomIDCentre
;
BondType
type
;
std
::
string
atomID
[
3
];
bool
aromatic
,
stereoConfig
;
ChiralVolumeSign
volumeSign
;
};
};
// --------------------------------------------------------------------
/// --------------------------------------------------------------------
// a class that contains information about a chemical compound.
/// \brief a class that contains information about a chemical compound.
// This information is derived from the ccp4 monomer library by default.
/// This information is derived from the CDD by default.
// To create compounds, you'd best use the factory method.
///
/// To create compounds, you use the factory method. You can add your own
/// compound definitions by calling the addExtraComponents function and
/// pass it a valid CCD formatted file.
class
Compound
class
Compound
{
{
public
:
public
:
Compound
(
cif
::
Datablock
&
db
);
Compound
(
const
std
::
string
&
file
,
const
std
::
string
&
id
,
const
std
::
string
&
name
,
const
std
::
string
&
group
);
// factory method, create a Compound based on the three letter code
// (for amino acids) or the one-letter code (for bases) or the
// code as it is known in the CCP4 monomer library.
static
const
Compound
*
create
(
const
std
::
string
&
id
);
// this second factory method can create a Compound even if it is not
// recorded in the library. It will take the values from the CCP4 lib
// unless the value passed to this function is not empty.
static
const
Compound
*
create
(
const
std
::
string
&
id
,
const
std
::
string
&
name
,
const
std
::
string
&
type
,
const
std
::
string
&
formula
);
// add an additional path to the monomer library.
static
void
addMonomerLibraryPath
(
const
std
::
string
&
dir
);
// accessors
std
::
string
id
()
const
{
return
mID
;
}
std
::
string
name
()
const
{
return
mName
;
}
std
::
string
type
()
const
;
std
::
string
group
()
const
{
return
mGroup
;
}
std
::
vector
<
CompoundAtom
>
atoms
()
const
{
return
mAtoms
;
}
std
::
vector
<
CompoundBond
>
bonds
()
const
{
return
mBonds
;
}
std
::
vector
<
CompoundAngle
>
angles
()
const
{
return
mAngles
;
}
std
::
vector
<
CompoundChiralCentre
>
chiralCentres
()
const
{
return
mChiralCentres
;
}
std
::
vector
<
CompoundPlane
>
planes
()
const
{
return
mPlanes
;
}
std
::
vector
<
CompoundTorsion
>
torsions
()
const
{
return
mTorsions
;
}
CompoundAtom
getAtomByID
(
const
std
::
string
&
atomID
)
const
;
bool
atomsBonded
(
const
std
::
string
&
atomId_1
,
const
std
::
string
&
atomId_2
)
const
;
float
atomBondValue
(
const
std
::
string
&
atomId_1
,
const
std
::
string
&
atomId_2
)
const
;
float
bondAngle
(
const
std
::
string
&
atomId_1
,
const
std
::
string
&
atomId_2
,
const
std
::
string
&
atomId_3
)
const
;
float
chiralVolume
(
const
std
::
string
&
centreID
)
const
;
std
::
string
formula
()
const
;
float
formulaWeight
()
const
;
int
charge
()
const
;
bool
isWater
()
const
;
bool
isSugar
()
const
;
std
::
vector
<
std
::
string
>
isomers
()
const
;
bool
isIsomerOf
(
const
Compound
&
c
)
const
;
std
::
vector
<
std
::
tuple
<
std
::
string
,
std
::
string
>>
mapToIsomer
(
const
Compound
&
c
)
const
;
private
:
~
Compound
();
~
Compound
();
cif
::
File
mCF
;
/// \brief factory method, create a Compound based on the three letter code
/// (for amino acids) or the one-letter code (for bases) or the
std
::
string
mID
;
/// code as it is known in the CCD.
std
::
string
mName
;
std
::
string
mGroup
;
std
::
vector
<
CompoundAtom
>
mAtoms
;
std
::
vector
<
CompoundBond
>
mBonds
;
std
::
vector
<
CompoundAngle
>
mAngles
;
std
::
vector
<
CompoundTorsion
>
mTorsions
;
std
::
vector
<
CompoundChiralCentre
>
mChiralCentres
;
std
::
vector
<
CompoundPlane
>
mPlanes
;
};
// --------------------------------------------------------------------
// struct containing information about the bonds
// This information comes from the CCP4 monomer library.
struct
LinkAtom
{
int
compID
;
std
::
string
atomID
;
bool
operator
==
(
const
LinkAtom
&
rhs
)
const
{
return
compID
==
rhs
.
compID
and
atomID
==
rhs
.
atomID
;
}
};
struct
LinkBond
{
LinkAtom
atom
[
2
];
BondType
type
;
float
distance
;
float
esd
;
};
// --------------------------------------------------------------------
// struct containing information about the bond-angles
// This information comes from the CCP4 monomer library.
struct
LinkAngle
{
LinkAtom
atom
[
3
];
float
angle
;
float
esd
;
};
// --------------------------------------------------------------------
// struct containing information about the bond-torsions
// This information comes from the CCP4 monomer library.
struct
LinkTorsion
static
const
Compound
*
create
(
const
std
::
string
&
id
);
{
LinkAtom
atom
[
4
];
float
angle
;
float
esd
;
int
period
;
};
// --------------------------------------------------------------------
// /// this second factory method can create a Compound even if it is not
// struct containing information about the bond-angles
// /// recorded in the library. It will take the values from the CCP4 lib
// This information comes from the CCP4 monomer library.
// /// unless the value passed to this function is not empty.
// static const Compound* create(const std::string& id, const std::string& name,
// const std::string& type, const std::string& formula);
struct
LinkPlane
/// \brief Create compounds based on the data in the file \a components
{
///
std
::
string
id
;
/// It is often required to add information about unknown components.
std
::
vector
<
LinkAtom
>
atoms
;
/// This file parses either a CCP4 or a CCD formatted components file
float
esd
;
///
};
/// \param components The mmCIF file containing the components
/// \result An array containing the ID's of the added components
static
std
::
vector
<
std
::
string
>
addExtraComponents
(
const
std
::
filesystem
::
path
&
components
);
// --------------------------------------------------------------------
// accessors
// struct containing information about a chiral centre
// This information comes from the CCP4 monomer library.
struct
LinkChiralCentre
std
::
string
id
()
const
{
return
mID
;
}
{
std
::
string
name
()
const
{
return
mName
;
}
std
::
string
id
;
std
::
string
type
()
const
{
return
mType
;
}
LinkAtom
atomCentre
;
std
::
string
formula
()
const
{
return
mFormula
;
}
LinkAtom
atom
[
3
];
float
formulaWeight
()
const
{
return
mFormulaWeight
;
}
ChiralVolumeSign
volumeSign
;
int
formalCharge
()
const
{
return
mFormalCharge
;
}
};
// --------------------------------------------------------------------
const
std
::
vector
<
CompoundAtom
>
&
atoms
()
const
{
return
mAtoms
;
}
// a class that contains information about a chemical link between compounds.
const
std
::
vector
<
CompoundBond
>
&
bonds
()
const
{
return
mBonds
;
}
// This information is derived from the ccp4 monomer library by default.
class
Link
CompoundAtom
getAtomByID
(
const
std
::
string
&
atomID
)
const
;
{
public
:
Link
(
cif
::
Datablock
&
db
);
bool
atomsBonded
(
const
std
::
string
&
atomId_1
,
const
std
::
string
&
atomId_2
)
const
;
// float atomBondValue(const std::string &atomId_1, const std::string &atomId_2) const;
// float bondAngle(const std::string &atomId_1, const std::string &atomId_2, const std::string &atomId_3) const;
// float chiralVolume(const std::string ¢reID) const;
// Factory method.
bool
isWater
()
const
static
const
Link
&
create
(
const
std
::
string
&
id
);
{
return
mID
==
"HOH"
or
mID
==
"H2O"
or
mID
==
"WAT"
;
}
// accessors
std
::
string
id
()
const
{
return
mID
;
}
std
::
vector
<
LinkBond
>
bonds
()
const
{
return
mBonds
;
}
std
::
vector
<
LinkAngle
>
angles
()
const
{
return
mAngles
;
}
std
::
vector
<
LinkChiralCentre
>
chiralCentres
()
const
{
return
mChiralCentres
;
}
std
::
vector
<
LinkPlane
>
planes
()
const
{
return
mPlanes
;
}
std
::
vector
<
LinkTorsion
>
torsions
()
const
{
return
mTorsions
;
}
float
atomBondValue
(
const
LinkAtom
&
atomId_1
,
const
LinkAtom
&
atomId_2
)
const
;
float
bondAngle
(
const
LinkAtom
&
atomId_1
,
const
LinkAtom
&
atomId_2
,
const
LinkAtom
&
atomId_3
)
const
;
float
chiralVolume
(
const
std
::
string
&
id
)
const
;
private
:
private
:
~
Link
()
;
std
::
string
mID
;
std
::
string
mName
;
std
::
string
mID
;
std
::
string
mType
;
std
::
vector
<
LinkBond
>
mBonds
;
std
::
string
mFormula
;
std
::
vector
<
LinkAngle
>
mAngles
;
float
mFormulaWeight
;
std
::
vector
<
LinkTorsion
>
mTorsions
;
int
mFormalCharge
;
std
::
vector
<
LinkChiralCentre
>
mChiralCentre
s
;
std
::
vector
<
CompoundAtom
>
mAtom
s
;
std
::
vector
<
LinkPlane
>
mPlane
s
;
std
::
vector
<
CompoundBond
>
mBond
s
;
};
};
// --------------------------------------------------------------------
// --------------------------------------------------------------------
// Factory class for Compound and Link objects
// Factory class for Compound and Link objects
extern
const
std
::
map
<
std
::
string
,
char
>
kAAMap
,
kBaseMap
;
extern
const
std
::
map
<
std
::
string
,
char
>
kAAMap
,
kBaseMap
;
class
CompoundFactory
class
CompoundFactory
{
{
public
:
public
:
static
void
init
(
bool
useThreadLocalInstanceOnly
);
static
void
init
(
bool
useThreadLocalInstanceOnly
);
static
CompoundFactory
&
instance
();
static
CompoundFactory
&
instance
();
static
void
clear
();
static
void
clear
();
void
pushDictionary
(
const
std
::
string
&
inDictFile
);
void
pushDictionary
(
const
std
::
string
&
inDictFile
);
void
popDictionary
();
void
popDictionary
();
bool
isKnownPeptide
(
const
std
::
string
&
res_name
)
const
;
bool
isKnownPeptide
(
const
std
::
string
&
res_name
)
const
;
bool
isKnownBase
(
const
std
::
string
&
res_name
)
const
;
bool
isKnownBase
(
const
std
::
string
&
res_name
)
const
;
std
::
string
unalias
(
const
std
::
string
&
res_name
)
const
;
const
Compound
*
get
(
std
::
string
id
);
const
Compound
*
create
(
std
::
string
id
);
const
Compound
*
get
(
std
::
string
id
);
const
Compound
*
create
(
std
::
string
id
);
const
Link
*
getLink
(
std
::
string
id
);
const
Link
*
createLink
(
std
::
string
id
);
~
CompoundFactory
();
~
CompoundFactory
();
private
:
private
:
CompoundFactory
();
CompoundFactory
();
CompoundFactory
(
const
CompoundFactory
&
)
=
delete
;
CompoundFactory
(
const
CompoundFactory
&
)
=
delete
;
CompoundFactory
&
operator
=
(
const
CompoundFactory
&
)
=
delete
;
CompoundFactory
&
operator
=
(
const
CompoundFactory
&
)
=
delete
;
static
CompoundFactory
*
sInstance
;
static
CompoundFactory
*
sInstance
;
static
thread_local
std
::
unique_ptr
<
CompoundFactory
>
tlInstance
;
static
thread_local
std
::
unique_ptr
<
CompoundFactory
>
tlInstance
;
static
bool
sUseThreadLocalInstance
;
static
bool
sUseThreadLocalInstance
;
class
CompoundFactoryImpl
*
mImpl
;
class
CompoundFactoryImpl
*
mImpl
;
};
};
}
}
// namespace mmcif
include/cif++/Structure.hpp
View file @
915d6504
...
@@ -141,9 +141,6 @@ class Atom
...
@@ -141,9 +141,6 @@ class Atom
// access data in compound for this atom
// access data in compound for this atom
// the energy-type field
std
::
string
energyType
()
const
;
// convenience routine
// convenience routine
bool
isBackBone
()
const
bool
isBackBone
()
const
{
{
...
@@ -243,10 +240,6 @@ class Residue
...
@@ -243,10 +240,6 @@ class Residue
bool
isWater
()
const
{
return
mCompoundID
==
"HOH"
;
}
bool
isWater
()
const
{
return
mCompoundID
==
"HOH"
;
}
bool
isSugar
()
const
;
bool
isPyranose
()
const
;
bool
isFuranose
()
const
;
const
Structure
&
structure
()
const
{
return
*
mStructure
;
}
const
Structure
&
structure
()
const
{
return
*
mStructure
;
}
bool
empty
()
const
{
return
mStructure
==
nullptr
;
}
bool
empty
()
const
{
return
mStructure
==
nullptr
;
}
...
...
src/Compound.cpp
View file @
915d6504
...
@@ -29,8 +29,8 @@
...
@@ -29,8 +29,8 @@
#endif
#endif
#include <map>
#include <map>
#include <numeric>
#include <mutex>
#include <mutex>
#include <numeric>
#include <shared_mutex>
#include <shared_mutex>
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string.hpp>
...
@@ -39,9 +39,9 @@
...
@@ -39,9 +39,9 @@
#include <fstream>
#include <fstream>
#include "cif++/Cif++.hpp"
#include "cif++/Cif++.hpp"
#include "cif++/Point.hpp"
#include "cif++/Compound.hpp"
#include "cif++/CifUtils.hpp"
#include "cif++/CifUtils.hpp"
#include "cif++/Compound.hpp"
#include "cif++/Point.hpp"
namespace
ba
=
boost
::
algorithm
;
namespace
ba
=
boost
::
algorithm
;
namespace
fs
=
std
::
filesystem
;
namespace
fs
=
std
::
filesystem
;
...
@@ -50,11 +50,41 @@ namespace mmcif
...
@@ -50,11 +50,41 @@ namespace mmcif
{
{
// --------------------------------------------------------------------
// --------------------------------------------------------------------
std
::
string
to_string
(
BondType
bondType
)
{
switch
(
bondType
)
{
case
BondType
:
:
sing
:
return
"sing"
;
case
BondType
:
:
doub
:
return
"doub"
;
case
BondType
:
:
trip
:
return
"trip"
;
case
BondType
:
:
quad
:
return
"quad"
;
case
BondType
:
:
arom
:
return
"arom"
;
case
BondType
:
:
poly
:
return
"poly"
;
case
BondType
:
:
delo
:
return
"delo"
;
case
BondType
:
:
pi
:
return
"pi"
;
}
}
BondType
from_string
(
const
std
::
string
&
bondType
)
{
if
(
cif
::
iequals
(
bondType
,
"sing"
))
return
BondType
::
sing
;
if
(
cif
::
iequals
(
bondType
,
"doub"
))
return
BondType
::
doub
;
if
(
cif
::
iequals
(
bondType
,
"trip"
))
return
BondType
::
trip
;
if
(
cif
::
iequals
(
bondType
,
"quad"
))
return
BondType
::
quad
;
if
(
cif
::
iequals
(
bondType
,
"arom"
))
return
BondType
::
arom
;
if
(
cif
::
iequals
(
bondType
,
"poly"
))
return
BondType
::
poly
;
if
(
cif
::
iequals
(
bondType
,
"delo"
))
return
BondType
::
delo
;
if
(
cif
::
iequals
(
bondType
,
"pi"
))
return
BondType
::
pi
;
throw
std
::
invalid_argument
(
"Invalid bondType: "
+
bondType
);
}
// --------------------------------------------------------------------
// Compound helper classes
// Compound helper classes
struct
CompoundAtomLess
struct
CompoundAtomLess
{
{
bool
operator
()(
const
CompoundAtom
&
a
,
const
CompoundAtom
&
b
)
const
bool
operator
()(
const
CompoundAtom
&
a
,
const
CompoundAtom
&
b
)
const
{
{
int
d
=
a
.
id
.
compare
(
b
.
id
);
int
d
=
a
.
id
.
compare
(
b
.
id
);
if
(
d
==
0
)
if
(
d
==
0
)
...
@@ -65,512 +95,59 @@ struct CompoundAtomLess
...
@@ -65,512 +95,59 @@ struct CompoundAtomLess
struct
CompoundBondLess
struct
CompoundBondLess
{
{
bool
operator
()(
const
CompoundBond
&
a
,
const
CompoundBond
&
b
)
const
bool
operator
()(
const
CompoundBond
&
a
,
const
CompoundBond
&
b
)
const
{
{
int
d
=
a
.
atomID
[
0
].
compare
(
b
.
atomID
[
0
]);
int
d
=
a
.
atomID
[
0
].
compare
(
b
.
atomID
[
0
]);
if
(
d
==
0
)
if
(
d
==
0
)
d
=
a
.
atomID
[
1
].
compare
(
b
.
atomID
[
1
]);
d
=
a
.
atomID
[
1
].
compare
(
b
.
atomID
[
1
]);
if
(
d
==
0
)
if
(
d
==
0
)
d
=
a
.
type
-
b
.
type
;
d
=
static_cast
<
int
>
(
a
.
type
)
-
static_cast
<
int
>
(
b
.
type
)
;
return
d
<
0
;
return
d
<
0
;
}
}
};
};
// --------------------------------------------------------------------
// Isomers in the ccp4 dictionary
struct
IsomerSet
{
std
::
vector
<
std
::
string
>
compounds
;
};
struct
IsomerSets
{
std
::
vector
<
IsomerSet
>
isomers
;
};
class
IsomerDB
{
public
:
static
IsomerDB
&
instance
();
size_t
count
(
const
std
::
string
&
compound
)
const
;
const
std
::
vector
<
std
::
string
>&
operator
[](
const
std
::
string
&
compound
)
const
;
private
:
IsomerDB
();
IsomerSets
mData
;
};
IsomerDB
::
IsomerDB
()
{
#if defined(CACHE_DIR)
fs
::
path
isomersFile
=
fs
::
path
(
CACHE_DIR
)
/
"isomers.txt"
;
if
(
not
fs
::
exists
(
isomersFile
))
std
::
cerr
<<
"Could not locate isomers.txt in "
CACHE_DIR
<<
std
::
endl
;
else
{
std
::
ifstream
is
(
isomersFile
);
std
::
string
line
;
while
(
std
::
getline
(
is
,
line
))
{
IsomerSet
compounds
;
ba
::
split
(
compounds
.
compounds
,
line
,
ba
::
is_any_of
(
":"
));
if
(
not
compounds
.
compounds
.
empty
())
mData
.
isomers
.
emplace_back
(
std
::
move
(
compounds
));
}
}
#endif
}
IsomerDB
&
IsomerDB
::
instance
()
{
static
IsomerDB
sInstance
;
return
sInstance
;
}
size_t
IsomerDB
::
count
(
const
std
::
string
&
compound
)
const
{
size_t
n
=
0
;
for
(
auto
&
d
:
mData
.
isomers
)
{
if
(
find
(
d
.
compounds
.
begin
(),
d
.
compounds
.
end
(),
compound
)
!=
d
.
compounds
.
end
())
++
n
;
}
return
n
;
}
const
std
::
vector
<
std
::
string
>&
IsomerDB
::
operator
[](
const
std
::
string
&
compound
)
const
{
for
(
auto
&
d
:
mData
.
isomers
)
{
if
(
find
(
d
.
compounds
.
begin
(),
d
.
compounds
.
end
(),
compound
)
!=
d
.
compounds
.
end
())
return
d
.
compounds
;
}
throw
std
::
runtime_error
(
"No isomer set found containing "
+
compound
);
}
// --------------------------------------------------------------------
// Brute force comparison of two structures, when they are isomers the
// mapping between the atoms of both is returned
// This is not an optimal solution, but it works good enough for now
struct
Node
{
std
::
string
id
;
AtomType
symbol
;
std
::
vector
<
std
::
tuple
<
size_t
,
BondType
>>
links
;
size_t
hydrogens
=
0
;
};
// Check to see if the nodes a[iA] and b[iB] are the start of a similar sub structure
bool
SubStructuresAreIsomeric
(
const
std
::
vector
<
Node
>&
a
,
const
std
::
vector
<
Node
>&
b
,
size_t
iA
,
size_t
iB
,
std
::
vector
<
bool
>
visitedA
,
std
::
vector
<
bool
>
visitedB
,
std
::
vector
<
std
::
tuple
<
std
::
string
,
std
::
string
>>&
outMapping
)
{
auto
&
na
=
a
[
iA
];
auto
&
nb
=
b
[
iB
];
size_t
N
=
na
.
links
.
size
();
assert
(
na
.
symbol
==
nb
.
symbol
);
assert
(
nb
.
links
.
size
()
==
N
);
// we're optimistic today
bool
result
=
true
;
visitedA
[
iA
]
=
true
;
visitedB
[
iB
]
=
true
;
// we now have two sets of links to compare.
// Compare each permutation of the second set of indices with the first
std
::
vector
<
size_t
>
ilb
(
N
);
iota
(
ilb
.
begin
(),
ilb
.
end
(),
0
);
for
(;;)
{
result
=
true
;
std
::
vector
<
std
::
tuple
<
std
::
string
,
std
::
string
>>
m
;
for
(
size_t
i
=
0
;
result
and
i
<
N
;
++
i
)
{
size_t
lA
,
lB
;
BondType
typeA
,
typeB
;
std
::
tie
(
lA
,
typeA
)
=
na
.
links
[
i
];
assert
(
lA
<
a
.
size
());
std
::
tie
(
lB
,
typeB
)
=
nb
.
links
[
ilb
[
i
]];
assert
(
lB
<
b
.
size
());
if
(
typeA
!=
typeB
or
visitedA
[
lA
]
!=
visitedB
[
lB
])
{
result
=
false
;
break
;
}
auto
&
la
=
a
[
lA
];
auto
&
lb
=
b
[
lB
];
if
(
la
.
symbol
!=
lb
.
symbol
or
la
.
hydrogens
!=
lb
.
hydrogens
or
la
.
links
.
size
()
!=
lb
.
links
.
size
())
{
result
=
false
;
break
;
}
if
(
not
visitedA
[
lA
])
result
=
SubStructuresAreIsomeric
(
a
,
b
,
lA
,
lB
,
visitedA
,
visitedB
,
m
);
}
if
(
result
)
{
outMapping
.
insert
(
outMapping
.
end
(),
m
.
begin
(),
m
.
end
());
break
;
}
if
(
not
next_permutation
(
ilb
.
begin
(),
ilb
.
end
()))
break
;
}
if
(
result
and
na
.
id
!=
nb
.
id
)
outMapping
.
emplace_back
(
na
.
id
,
nb
.
id
);
return
result
;
}
bool
StructuresAreIsomeric
(
std
::
vector
<
CompoundAtom
>
atomsA
,
const
std
::
vector
<
CompoundBond
>&
bondsA
,
std
::
vector
<
CompoundAtom
>
atomsB
,
const
std
::
vector
<
CompoundBond
>&
bondsB
,
std
::
vector
<
std
::
tuple
<
std
::
string
,
std
::
string
>>&
outMapping
)
{
assert
(
atomsA
.
size
()
==
atomsB
.
size
());
assert
(
bondsA
.
size
()
==
bondsB
.
size
());
std
::
vector
<
Node
>
a
,
b
;
std
::
map
<
std
::
string
,
size_t
>
ma
,
mb
;
for
(
auto
&
atomA
:
atomsA
)
{
ma
[
atomA
.
id
]
=
a
.
size
();
a
.
push_back
({
atomA
.
id
,
atomA
.
typeSymbol
});
}
for
(
auto
&
bondA
:
bondsA
)
{
size_t
atom1
=
ma
.
at
(
bondA
.
atomID
[
0
]);
size_t
atom2
=
ma
.
at
(
bondA
.
atomID
[
1
]);
if
(
a
[
atom2
].
symbol
==
H
)
a
[
atom1
].
hydrogens
+=
1
;
else
a
[
atom1
].
links
.
emplace_back
(
atom2
,
bondA
.
type
);
if
(
a
[
atom1
].
symbol
==
H
)
a
[
atom2
].
hydrogens
+=
1
;
else
a
[
atom2
].
links
.
emplace_back
(
atom1
,
bondA
.
type
);
}
for
(
auto
&
atomB
:
atomsB
)
{
mb
[
atomB
.
id
]
=
b
.
size
();
b
.
push_back
({
atomB
.
id
,
atomB
.
typeSymbol
});
}
for
(
auto
&
bondB
:
bondsB
)
{
size_t
atom1
=
mb
.
at
(
bondB
.
atomID
[
0
]);
size_t
atom2
=
mb
.
at
(
bondB
.
atomID
[
1
]);
if
(
b
[
atom2
].
symbol
==
H
)
b
[
atom1
].
hydrogens
+=
1
;
else
b
[
atom1
].
links
.
emplace_back
(
atom2
,
bondB
.
type
);
if
(
b
[
atom1
].
symbol
==
H
)
b
[
atom2
].
hydrogens
+=
1
;
else
b
[
atom2
].
links
.
emplace_back
(
atom1
,
bondB
.
type
);
}
size_t
N
=
atomsA
.
size
();
size_t
ia
=
0
;
bool
result
=
false
;
// try each atom in B to see if it can be traced to be similar to A starting at zero
for
(
size_t
ib
=
0
;
ib
<
N
;
++
ib
)
{
if
(
b
[
ib
].
symbol
!=
a
[
ia
].
symbol
or
a
[
ia
].
hydrogens
!=
b
[
ib
].
hydrogens
or
a
[
ia
].
links
.
size
()
!=
b
[
ib
].
links
.
size
())
continue
;
std
::
vector
<
bool
>
va
(
N
,
false
),
vb
(
N
,
false
);
if
(
SubStructuresAreIsomeric
(
a
,
b
,
ia
,
ib
,
va
,
vb
,
outMapping
))
{
result
=
true
;
break
;
}
}
return
result
;
}
// --------------------------------------------------------------------
// --------------------------------------------------------------------
// Compound
// Compound
Compound
::
Compound
(
const
std
::
string
&
file
,
const
std
::
string
&
id
,
Compound
::
Compound
(
cif
::
Datablock
&
db
)
const
std
::
string
&
name
,
const
std
::
string
&
group
)
:
mID
(
id
),
mName
(
name
),
mGroup
(
group
)
{
{
try
auto
&
chemComp
=
db
[
"chem_comp"
];
{
mCF
.
load
(
file
);
// locate the datablock
auto
&
db
=
mCF
[
"comp_"
+
id
];
auto
&
compoundAtoms
=
db
[
"chem_comp_atom"
];
for
(
auto
row
:
compoundAtoms
)
{
std
::
string
id
,
symbol
,
energy
;
float
charge
;
cif
::
tie
(
id
,
symbol
,
energy
,
charge
)
=
row
.
get
(
"atom_id"
,
"type_symbol"
,
"type_energy"
,
"partial_charge"
);
mAtoms
.
push_back
({
id
,
AtomTypeTraits
(
symbol
).
type
(),
energy
,
charge
});
}
sort
(
mAtoms
.
begin
(),
mAtoms
.
end
(),
CompoundAtomLess
());
auto
&
compBonds
=
db
[
"chem_comp_bond"
];
for
(
auto
row
:
compBonds
)
{
CompoundBond
b
;
std
::
string
type
,
aromatic
;
cif
::
tie
(
b
.
atomID
[
0
],
b
.
atomID
[
1
],
type
,
b
.
distance
,
b
.
esd
)
=
row
.
get
(
"atom_id_1"
,
"atom_id_2"
,
"type"
,
"value_dist"
,
"value_dist_esd"
);
using
cif
::
iequals
;
if
(
iequals
(
type
,
"single"
)
or
iequals
(
type
,
"sing"
))
b
.
type
=
singleBond
;
else
if
(
iequals
(
type
,
"double"
)
or
iequals
(
type
,
"doub"
))
b
.
type
=
doubleBond
;
else
if
(
iequals
(
type
,
"triple"
)
or
iequals
(
type
,
"trip"
))
b
.
type
=
tripleBond
;
else
if
(
iequals
(
type
,
"deloc"
)
or
iequals
(
type
,
"aromat"
)
or
iequals
(
type
,
"aromatic"
))
b
.
type
=
delocalizedBond
;
else
{
if
(
cif
::
VERBOSE
)
std
::
cerr
<<
"Unimplemented chem_comp_bond.type "
<<
type
<<
" in "
<<
id
<<
std
::
endl
;
b
.
type
=
singleBond
;
}
if
(
b
.
atomID
[
0
]
>
b
.
atomID
[
1
])
swap
(
b
.
atomID
[
0
],
b
.
atomID
[
1
]);
mBonds
.
push_back
(
b
);
}
sort
(
mBonds
.
begin
(),
mBonds
.
end
(),
CompoundBondLess
());
for
(
auto
row
:
db
[
"chem_comp_angle"
])
if
(
chemComp
.
size
()
!=
1
)
{
throw
std
::
runtime_error
(
"Invalid compound file, chem_comp should contain a single row"
);
CompoundAngle
a
;
cif
::
tie
(
a
.
atomID
[
0
],
a
.
atomID
[
1
],
a
.
atomID
[
2
],
a
.
angle
,
a
.
esd
)
=
row
.
get
(
"atom_id_1"
,
"atom_id_2"
,
"atom_id_3"
,
"value_angle"
,
"value_angle_esd"
);
mAngles
.
push_back
(
a
);
}
for
(
auto
row
:
db
[
"chem_comp_tor"
])
cif
::
tie
(
mID
,
mName
,
mType
,
mFormula
,
mFormulaWeight
,
mFormalCharge
)
=
{
chemComp
.
front
().
get
(
"id"
,
"name"
,
"type"
,
"formula"
,
"formula_weight"
,
"pdbx_formal_charge"
);
CompoundTorsion
a
;
cif
::
tie
(
a
.
atomID
[
0
],
a
.
atomID
[
1
],
a
.
atomID
[
2
],
a
.
atomID
[
3
],
a
.
angle
,
a
.
esd
,
a
.
period
)
=
row
.
get
(
"atom_id_1"
,
"atom_id_2"
,
"atom_id_3"
,
"atom_id_4"
,
"value_angle"
,
"value_angle_esd"
,
"period"
);
mTorsions
.
push_back
(
a
);
}
for
(
auto
row
:
db
[
"chem_comp_chir"
])
auto
&
chemCompAtom
=
db
[
"chem_comp_atom"
];
{
for
(
auto
row
:
chemCompAtom
)
CompoundChiralCentre
cc
;
std
::
string
volumeSign
;
cif
::
tie
(
cc
.
id
,
cc
.
atomIDCentre
,
cc
.
atomID
[
0
],
cc
.
atomID
[
1
],
cc
.
atomID
[
2
],
volumeSign
)
=
row
.
get
(
"id"
,
"atom_id_centre"
,
"atom_id_1"
,
"atom_id_2"
,
"atom_id_3"
,
"volume_sign"
);
if
(
volumeSign
==
"negativ"
or
volumeSign
==
"negative"
)
cc
.
volumeSign
=
negativ
;
else
if
(
volumeSign
==
"positiv"
or
volumeSign
==
"positive"
)
cc
.
volumeSign
=
positiv
;
else
if
(
volumeSign
==
"both"
)
cc
.
volumeSign
=
both
;
else
{
if
(
cif
::
VERBOSE
)
std
::
cerr
<<
"Unimplemented chem_comp_chir.volume_sign "
<<
volumeSign
<<
" in "
<<
id
<<
std
::
endl
;
continue
;
}
mChiralCentres
.
push_back
(
cc
);
}
auto
&
compPlanes
=
db
[
"chem_comp_plane_atom"
];
for
(
auto
row
:
compPlanes
)
{
std
::
string
atom_id
,
plane_id
;
float
esd
;
cif
::
tie
(
atom_id
,
plane_id
,
esd
)
=
row
.
get
(
"atom_id"
,
"plane_id"
,
"dist_esd"
);
auto
i
=
find_if
(
mPlanes
.
begin
(),
mPlanes
.
end
(),
[
&
](
auto
&
p
)
{
return
p
.
id
==
plane_id
;});
if
(
i
==
mPlanes
.
end
())
mPlanes
.
emplace_back
(
CompoundPlane
{
plane_id
,
{
atom_id
},
esd
});
else
i
->
atomID
.
push_back
(
atom_id
);
}
}
catch
(
const
std
::
exception
&
ex
)
{
{
std
::
cerr
<<
"Error loading ccp4 file for "
<<
id
<<
" from file "
<<
file
<<
std
::
endl
;
CompoundAtom
atom
;
throw
;
std
::
string
typeSymbol
;
cif
::
tie
(
atom
.
id
,
typeSymbol
,
atom
.
charge
,
atom
.
aromatic
,
atom
.
leavingAtom
,
atom
.
stereoConfig
,
atom
.
x
,
atom
.
y
,
atom
.
z
)
=
row
.
get
(
"id"
,
"type_symbol"
,
"charge"
,
"pdbx_aromatic_flag"
,
"pdbx_leaving_atom_flag"
,
"pdbx_stereo_config"
,
"model_Cartn_x"
,
"model_Cartn_y"
,
"model_Cartn_z"
);
atom
.
typeSymbol
=
AtomTypeTraits
(
typeSymbol
).
type
();
mAtoms
.
push_back
(
std
::
move
(
atom
));
}
}
}
std
::
string
Compound
::
formula
()
const
auto
&
chemCompBond
=
db
[
"chem_comp_bond"
];
{
for
(
auto
row
:
chemCompBond
)
std
::
string
result
;
std
::
map
<
std
::
string
,
uint32_t
>
atoms
;
float
chargeSum
=
0
;
for
(
auto
r
:
mAtoms
)
{
atoms
[
AtomTypeTraits
(
r
.
typeSymbol
).
symbol
()]
+=
1
;
chargeSum
+=
r
.
partialCharge
;
}
auto
c
=
atoms
.
find
(
"C"
);
if
(
c
!=
atoms
.
end
())
{
result
=
"C"
;
if
(
c
->
second
>
1
)
result
+=
std
::
to_string
(
c
->
second
);
atoms
.
erase
(
c
);
auto
h
=
atoms
.
find
(
"H"
);
if
(
h
!=
atoms
.
end
())
{
result
+=
" H"
;
if
(
h
->
second
>
1
)
result
+=
std
::
to_string
(
h
->
second
);
atoms
.
erase
(
h
);
}
}
for
(
auto
a
:
atoms
)
{
{
if
(
not
result
.
empty
())
CompoundBond
bond
;
result
+=
' '
;
std
::
string
valueOrder
;
cif
::
tie
(
bond
.
atomID
[
0
],
bond
.
atomID
[
1
],
valueOrder
,
bond
.
aromatic
,
bond
.
stereoConfig
)
result
+=
a
.
first
;
=
row
.
get
(
"atom_id_1"
,
"atom_id_2"
,
"value_order"
,
"pdbx_aromatic_flag"
,
"pdbx_stereo_config"
)
;
if
(
a
.
second
>
1
)
bond
.
type
=
from_string
(
valueOrder
);
result
+=
std
::
to_string
(
a
.
second
);
mBonds
.
push_back
(
std
::
move
(
bond
));
}
}
int
charge
=
lrint
(
chargeSum
);
if
(
charge
!=
0
)
result
+=
' '
+
std
::
to_string
(
charge
);
return
result
;
}
float
Compound
::
formulaWeight
()
const
{
float
result
=
0
;
for
(
auto
r
:
mAtoms
)
result
+=
AtomTypeTraits
(
r
.
typeSymbol
).
weight
();
return
result
;
}
int
Compound
::
charge
()
const
{
float
result
=
0
;
for
(
auto
r
:
mAtoms
)
result
+=
r
.
partialCharge
;
return
lrint
(
result
);
}
std
::
string
Compound
::
type
()
const
{
std
::
string
result
;
// known groups are (counted from ccp4 monomer dictionary)
// D-pyranose
// DNA
// L-PEPTIDE LINKING
// L-SACCHARIDE
// L-peptide
// L-pyranose
// M-peptide
// NON-POLYMER
// P-peptide
// RNA
// furanose
// non-polymer
// non_polymer
// peptide
// pyranose
// saccharide
if
(
cif
::
iequals
(
mID
,
"gly"
))
result
=
"peptide linking"
;
else
if
(
cif
::
iequals
(
mGroup
,
"l-peptide"
)
or
cif
::
iequals
(
mGroup
,
"L-peptide linking"
)
or
cif
::
iequals
(
mGroup
,
"peptide"
))
result
=
"L-peptide linking"
;
else
if
(
cif
::
iequals
(
mGroup
,
"DNA"
))
result
=
"DNA linking"
;
else
if
(
cif
::
iequals
(
mGroup
,
"RNA"
))
result
=
"RNA linking"
;
// else
// result = mGroup;
return
result
;
}
bool
Compound
::
isWater
()
const
{
return
mID
==
"HOH"
or
mID
==
"H2O"
;
}
bool
Compound
::
isSugar
()
const
{
return
cif
::
iequals
(
mGroup
,
"furanose"
)
or
cif
::
iequals
(
mGroup
,
"pyranose"
);
}
}
CompoundAtom
Compound
::
getAtomByID
(
const
std
::
string
&
atomID
)
const
CompoundAtom
Compound
::
getAtomByID
(
const
std
::
string
&
atomID
)
const
{
{
CompoundAtom
result
=
{};
CompoundAtom
result
=
{};
for
(
auto
&
a
:
mAtoms
)
for
(
auto
&
a
:
mAtoms
)
{
{
if
(
a
.
id
==
atomID
)
if
(
a
.
id
==
atomID
)
{
{
...
@@ -579,13 +156,13 @@ CompoundAtom Compound::getAtomByID(const std::string& atomID) const
...
@@ -579,13 +156,13 @@ CompoundAtom Compound::getAtomByID(const std::string& atomID) const
}
}
}
}
if
(
result
.
id
!=
atomID
)
if
(
result
.
id
!=
atomID
)
throw
std
::
out_of_range
(
"No atom "
+
atomID
+
" in Compound "
+
mID
);
throw
std
::
out_of_range
(
"No atom "
+
atomID
+
" in Compound "
+
mID
);
return
result
;
return
result
;
}
}
const
Compound
*
Compound
::
create
(
const
std
::
string
&
id
)
const
Compound
*
Compound
::
create
(
const
std
::
string
&
id
)
{
{
auto
result
=
CompoundFactory
::
instance
().
get
(
id
);
auto
result
=
CompoundFactory
::
instance
().
get
(
id
);
if
(
result
==
nullptr
)
if
(
result
==
nullptr
)
...
@@ -593,149 +170,44 @@ const Compound* Compound::create(const std::string& id)
...
@@ -593,149 +170,44 @@ const Compound* Compound::create(const std::string& id)
return
result
;
return
result
;
}
}
bool
Compound
::
atomsBonded
(
const
std
::
string
&
atomId_1
,
const
std
::
string
&
atomId_2
)
const
bool
Compound
::
atomsBonded
(
const
std
::
string
&
atomId_1
,
const
std
::
string
&
atomId_2
)
const
{
{
auto
i
=
find_if
(
mBonds
.
begin
(),
mBonds
.
end
(),
auto
i
=
find_if
(
mBonds
.
begin
(),
mBonds
.
end
(),
[
&
](
const
CompoundBond
&
b
)
[
&
](
const
CompoundBond
&
b
)
{
{
return
(
b
.
atomID
[
0
]
==
atomId_1
and
b
.
atomID
[
1
]
==
atomId_2
)
or
(
b
.
atomID
[
0
]
==
atomId_2
and
b
.
atomID
[
1
]
==
atomId_1
);
return
(
b
.
atomID
[
0
]
==
atomId_1
and
b
.
atomID
[
1
]
==
atomId_2
)
or
(
b
.
atomID
[
0
]
==
atomId_2
and
b
.
atomID
[
1
]
==
atomId_1
);
});
});
return
i
!=
mBonds
.
end
();
return
i
!=
mBonds
.
end
();
}
}
float
Compound
::
atomBondValue
(
const
std
::
string
&
atomId_1
,
const
std
::
string
&
atomId_2
)
const
// float Compound::atomBondValue(const std::string &atomId_1, const std::string &atomId_2) const
{
// {
auto
i
=
find_if
(
mBonds
.
begin
(),
mBonds
.
end
(),
// auto i = find_if(mBonds.begin(), mBonds.end(),
[
&
](
const
CompoundBond
&
b
)
// [&](const CompoundBond &b) {
{
// return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2) or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
return
(
b
.
atomID
[
0
]
==
atomId_1
and
b
.
atomID
[
1
]
==
atomId_2
)
// });
or
(
b
.
atomID
[
0
]
==
atomId_2
and
b
.
atomID
[
1
]
==
atomId_1
);
});
return
i
!=
mBonds
.
end
()
?
i
->
distance
:
0
;
}
bool
Compound
::
isIsomerOf
(
const
Compound
&
c
)
const
// return i != mBonds.end() ? i->distance : 0;
{
// }
bool
result
=
false
;
for
(;;)
{
// easy tests first
if
(
mID
==
c
.
mID
)
{
result
=
true
;
break
;
}
if
(
mAtoms
.
size
()
!=
c
.
mAtoms
.
size
())
break
;
if
(
mBonds
.
size
()
!=
c
.
mBonds
.
size
())
break
;
if
(
mChiralCentres
.
size
()
!=
c
.
mChiralCentres
.
size
())
break
;
// same number of atoms of each type?
std
::
map
<
AtomType
,
int
>
aTypeCount
,
bTypeCount
;
bool
sameAtomNames
=
true
;
for
(
size_t
i
=
0
;
i
<
mAtoms
.
size
();
++
i
)
{
auto
&
a
=
mAtoms
[
i
];
auto
&
b
=
c
.
mAtoms
[
i
];
aTypeCount
[
a
.
typeSymbol
]
+=
1
;
bTypeCount
[
b
.
typeSymbol
]
+=
1
;
if
(
a
.
id
!=
b
.
id
or
a
.
typeSymbol
!=
b
.
typeSymbol
)
sameAtomNames
=
false
;
}
if
(
not
sameAtomNames
and
aTypeCount
!=
bTypeCount
)
break
;
bool
sameBonds
=
sameAtomNames
;
for
(
size_t
i
=
0
;
sameBonds
and
i
<
mBonds
.
size
();
++
i
)
{
sameBonds
=
mBonds
[
i
].
atomID
[
0
]
==
c
.
mBonds
[
i
].
atomID
[
0
]
and
mBonds
[
i
].
atomID
[
1
]
==
c
.
mBonds
[
i
].
atomID
[
1
]
and
mBonds
[
i
].
type
==
c
.
mBonds
[
i
].
type
;
}
if
(
sameBonds
)
{
result
=
true
;
break
;
}
// implement rest of tests
std
::
vector
<
std
::
tuple
<
std
::
string
,
std
::
string
>>
mapping
;
result
=
StructuresAreIsomeric
(
mAtoms
,
mBonds
,
c
.
mAtoms
,
c
.
mBonds
,
mapping
);
if
(
cif
::
VERBOSE
and
result
)
{
for
(
auto
&
m
:
mapping
)
std
::
cerr
<<
" "
<<
std
::
get
<
0
>
(
m
)
<<
" => "
<<
std
::
get
<
1
>
(
m
)
<<
std
::
endl
;
}
break
;
}
return
result
;
}
std
::
vector
<
std
::
tuple
<
std
::
string
,
std
::
string
>>
Compound
::
mapToIsomer
(
const
Compound
&
c
)
const
// float Compound::bondAngle(const std::string &atomId_1, const std::string &atomId_2, const std::string &atomId_3) const
{
// {
std
::
vector
<
std
::
tuple
<
std
::
string
,
std
::
string
>>
result
;
// float result = nanf("1");
bool
check
=
StructuresAreIsomeric
(
mAtoms
,
mBonds
,
c
.
mAtoms
,
c
.
mBonds
,
result
);
if
(
not
check
)
throw
std
::
runtime_error
(
"Compounds "
+
id
()
+
" and "
+
c
.
id
()
+
" are not isomers in call to mapToIsomer"
);
return
result
;
}
std
::
vector
<
std
::
string
>
Compound
::
isomers
()
const
// for (auto &a : mAngles)
{
// {
std
::
vector
<
std
::
string
>
result
;
// if (not(a.atomID[1] == atomId_2 and
// ((a.atomID[0] == atomId_1 and a.atomID[2] == atomId_3) or
auto
&
db
=
IsomerDB
::
instance
();
// (a.atomID[2] == atomId_1 and a.atomID[0] == atomId_3))))
if
(
db
.
count
(
mID
))
// continue;
{
result
=
db
[
mID
];
auto
i
=
find
(
result
.
begin
(),
result
.
end
(),
mID
);
assert
(
i
!=
result
.
end
());
result
.
erase
(
i
);
}
return
result
;
}
float
Compound
::
bondAngle
(
const
std
::
string
&
atomId_1
,
const
std
::
string
&
atomId_2
,
const
std
::
string
&
atomId_3
)
const
// result = a.angle;
{
// break;
float
result
=
nanf
(
"1"
);
// }
for
(
auto
&
a
:
mAngles
)
// return result;
{
// }
if
(
not
(
a
.
atomID
[
1
]
==
atomId_2
and
((
a
.
atomID
[
0
]
==
atomId_1
and
a
.
atomID
[
2
]
==
atomId_3
)
or
(
a
.
atomID
[
2
]
==
atomId_1
and
a
.
atomID
[
0
]
==
atomId_3
))))
continue
;
result
=
a
.
angle
;
break
;
}
return
result
;
}
//static float calcC(float a, float b, float alpha)
//static float calcC(float a, float b, float alpha)
//{
//{
...
@@ -743,288 +215,85 @@ float Compound::bondAngle(const std::string& atomId_1, const std::string& atomId
...
@@ -743,288 +215,85 @@ float Compound::bondAngle(const std::string& atomId_1, const std::string& atomId
// float d = sqrt(b * b - f * f);
// float d = sqrt(b * b - f * f);
// float e = a - d;
// float e = a - d;
// float c = sqrt(f * f + e * e);
// float c = sqrt(f * f + e * e);
//
//
// return c;
// return c;
//}
//}
float
Compound
::
chiralVolume
(
const
std
::
string
&
centreID
)
const
// float Compound::chiralVolume(const std::string ¢reID) const
{
// {
float
result
=
0
;
// float result = 0;
for
(
auto
&
cv
:
mChiralCentres
)
{
if
(
cv
.
id
!=
centreID
)
continue
;
// calculate the expected chiral volume
// the edges
float
a
=
atomBondValue
(
cv
.
atomIDCentre
,
cv
.
atomID
[
0
]);
float
b
=
atomBondValue
(
cv
.
atomIDCentre
,
cv
.
atomID
[
1
]);
float
c
=
atomBondValue
(
cv
.
atomIDCentre
,
cv
.
atomID
[
2
]);
// the angles for the top of the tetrahedron
float
alpha
=
bondAngle
(
cv
.
atomID
[
0
],
cv
.
atomIDCentre
,
cv
.
atomID
[
1
]);
float
beta
=
bondAngle
(
cv
.
atomID
[
1
],
cv
.
atomIDCentre
,
cv
.
atomID
[
2
]);
float
gamma
=
bondAngle
(
cv
.
atomID
[
2
],
cv
.
atomIDCentre
,
cv
.
atomID
[
0
]);
float
cosa
=
cos
(
alpha
*
kPI
/
180
);
float
cosb
=
cos
(
beta
*
kPI
/
180
);
float
cosc
=
cos
(
gamma
*
kPI
/
180
);
result
=
(
a
*
b
*
c
*
sqrt
(
1
+
2
*
cosa
*
cosb
*
cosc
-
(
cosa
*
cosa
)
-
(
cosb
*
cosb
)
-
(
cosc
*
cosc
)))
/
6
;
if
(
cv
.
volumeSign
==
negativ
)
result
=
-
result
;
break
;
}
return
result
;
}
// --------------------------------------------------------------------
// for (auto &cv : mChiralCentres)
// {
// if (cv.id != centreID)
// continue;
Link
::
Link
(
cif
::
Datablock
&
db
)
// // calculate the expected chiral volume
{
mID
=
db
.
getName
();
auto
&
linkBonds
=
db
[
"chem_link_bond"
];
for
(
auto
row
:
linkBonds
)
{
LinkBond
b
;
std
::
string
type
,
aromatic
;
cif
::
tie
(
b
.
atom
[
0
].
compID
,
b
.
atom
[
0
].
atomID
,
b
.
atom
[
1
].
compID
,
b
.
atom
[
1
].
atomID
,
type
,
b
.
distance
,
b
.
esd
)
=
row
.
get
(
"atom_1_comp_id"
,
"atom_id_1"
,
"atom_2_comp_id"
,
"atom_id_2"
,
"type"
,
"value_dist"
,
"value_dist_esd"
);
using
cif
::
iequals
;
if
(
iequals
(
type
,
"single"
)
or
iequals
(
type
,
"sing"
))
b
.
type
=
singleBond
;
else
if
(
iequals
(
type
,
"double"
)
or
iequals
(
type
,
"doub"
))
b
.
type
=
doubleBond
;
else
if
(
iequals
(
type
,
"triple"
)
or
iequals
(
type
,
"trip"
))
b
.
type
=
tripleBond
;
else
if
(
iequals
(
type
,
"deloc"
)
or
iequals
(
type
,
"aromat"
)
or
iequals
(
type
,
"aromatic"
))
b
.
type
=
delocalizedBond
;
else
{
if
(
cif
::
VERBOSE
)
std
::
cerr
<<
"Unimplemented chem_link_bond.type "
<<
type
<<
" in "
<<
mID
<<
std
::
endl
;
b
.
type
=
singleBond
;
}
// if (b.atom[0] > b.atom[1])
// swap(b.atom[0], b.atom[1]);
mBonds
.
push_back
(
b
);
}
// sort(mBonds.begin(), mBonds.end(), LinkBondLess());
auto
&
linkAngles
=
db
[
"chem_link_angle"
];
// // the edges
for
(
auto
row
:
linkAngles
)
{
LinkAngle
a
;
cif
::
tie
(
a
.
atom
[
0
].
compID
,
a
.
atom
[
0
].
atomID
,
a
.
atom
[
1
].
compID
,
a
.
atom
[
1
].
atomID
,
a
.
atom
[
2
].
compID
,
a
.
atom
[
2
].
atomID
,
a
.
angle
,
a
.
esd
)
=
row
.
get
(
"atom_1_comp_id"
,
"atom_id_1"
,
"atom_2_comp_id"
,
"atom_id_2"
,
"atom_3_comp_id"
,
"atom_id_3"
,
"value_angle"
,
"value_angle_esd"
);
mAngles
.
push_back
(
a
);
}
for
(
auto
row
:
db
[
"chem_link_tor"
])
// float a = atomBondValue(cv.atomIDCentre, cv.atomID[0]);
{
// float b = atomBondValue(cv.atomIDCentre, cv.atomID[1]);
LinkTorsion
a
;
// float c = atomBondValue(cv.atomIDCentre, cv.atomID[2]);
cif
::
tie
(
a
.
atom
[
0
].
compID
,
a
.
atom
[
0
].
atomID
,
a
.
atom
[
1
].
compID
,
a
.
atom
[
1
].
atomID
,
a
.
atom
[
2
].
compID
,
a
.
atom
[
2
].
atomID
,
a
.
atom
[
3
].
compID
,
a
.
atom
[
3
].
atomID
,
a
.
angle
,
a
.
esd
,
a
.
period
)
=
row
.
get
(
"atom_1_comp_id"
,
"atom_id_1"
,
"atom_2_comp_id"
,
"atom_id_2"
,
"atom_3_comp_id"
,
"atom_id_3"
,
"atom_4_comp_id"
,
"atom_id_4"
,
"value_angle"
,
"value_angle_esd"
,
"period"
);
mTorsions
.
push_back
(
a
);
}
auto
&
linkChir
=
db
[
"chem_link_chir"
];
// // the angles for the top of the tetrahedron
for
(
auto
row
:
linkChir
)
{
LinkChiralCentre
cc
;
std
::
string
volumeSign
;
cif
::
tie
(
cc
.
id
,
cc
.
atomCentre
.
compID
,
cc
.
atomCentre
.
atomID
,
cc
.
atom
[
0
].
compID
,
cc
.
atom
[
0
].
atomID
,
cc
.
atom
[
1
].
compID
,
cc
.
atom
[
1
].
atomID
,
cc
.
atom
[
2
].
compID
,
cc
.
atom
[
2
].
atomID
,
volumeSign
)
=
row
.
get
(
"id"
,
"atom_centre_comp_id"
,
"atom_id_centre"
,
"atom_1_comp_id"
,
"atom_id_1"
,
"atom_2_comp_id"
,
"atom_id_2"
,
"atom_3_comp_id"
,
"atom_id_3"
,
"volume_sign"
);
if
(
volumeSign
==
"negativ"
or
volumeSign
==
"negative"
)
cc
.
volumeSign
=
negativ
;
else
if
(
volumeSign
==
"positiv"
or
volumeSign
==
"positive"
)
cc
.
volumeSign
=
positiv
;
else
if
(
volumeSign
==
"both"
)
cc
.
volumeSign
=
both
;
else
{
if
(
cif
::
VERBOSE
)
std
::
cerr
<<
"Unimplemented chem_link_chir.volume_sign "
<<
volumeSign
<<
" in "
<<
mID
<<
std
::
endl
;
continue
;
}
mChiralCentres
.
push_back
(
cc
);
}
auto
&
linkPlanes
=
db
[
"chem_link_plane"
];
// float alpha = bondAngle(cv.atomID[0], cv.atomIDCentre, cv.atomID[1]);
for
(
auto
row
:
linkPlanes
)
// float beta = bondAngle(cv.atomID[1], cv.atomIDCentre, cv.atomID[2]);
{
// float gamma = bondAngle(cv.atomID[2], cv.atomIDCentre, cv.atomID[0]);
int
compID
;
std
::
string
atomID
,
planeID
;
float
esd
;
cif
::
tie
(
planeID
,
compID
,
atomID
,
esd
)
=
row
.
get
(
"plane_id"
,
"atom_comp_id"
,
"atom_id"
,
"dist_esd"
);
auto
i
=
find_if
(
mPlanes
.
begin
(),
mPlanes
.
end
(),
[
&
](
auto
&
p
)
{
return
p
.
id
==
planeID
;});
if
(
i
==
mPlanes
.
end
())
{
std
::
vector
<
LinkAtom
>
atoms
{
LinkAtom
{
compID
,
atomID
}};
mPlanes
.
emplace_back
(
LinkPlane
{
planeID
,
move
(
atoms
),
esd
});
}
else
i
->
atoms
.
push_back
({
compID
,
atomID
});
}
}
const
Link
&
Link
::
create
(
const
std
::
string
&
id
)
// float cosa = cos(alpha * kPI / 180);
{
// float cosb = cos(beta * kPI / 180);
auto
result
=
CompoundFactory
::
instance
().
getLink
(
id
);
// float cosc = cos(gamma * kPI / 180);
if
(
result
==
nullptr
)
result
=
CompoundFactory
::
instance
().
createLink
(
id
);
if
(
result
==
nullptr
)
throw
std
::
runtime_error
(
"Link with id "
+
id
+
" not found"
);
return
*
result
;
}
Link
::~
Link
()
// result = (a * b * c * sqrt(1 + 2 * cosa * cosb * cosc - (cosa * cosa) - (cosb * cosb) - (cosc * cosc))) / 6;
{
}
float
Link
::
atomBondValue
(
const
LinkAtom
&
atom1
,
const
LinkAtom
&
atom2
)
const
// if (cv.volumeSign == negativ)
{
// result = -result;
auto
i
=
find_if
(
mBonds
.
begin
(),
mBonds
.
end
(),
[
&
](
auto
&
b
)
{
return
(
b
.
atom
[
0
]
==
atom1
and
b
.
atom
[
1
]
==
atom2
)
or
(
b
.
atom
[
0
]
==
atom2
and
b
.
atom
[
1
]
==
atom1
);
});
return
i
!=
mBonds
.
end
()
?
i
->
distance
:
0
;
}
float
Link
::
bondAngle
(
const
LinkAtom
&
atom1
,
const
LinkAtom
&
atom2
,
const
LinkAtom
&
atom3
)
const
// break;
{
// }
float
result
=
nanf
(
"1"
);
for
(
auto
&
a
:
mAngles
)
{
if
(
not
(
a
.
atom
[
1
]
==
atom2
and
((
a
.
atom
[
0
]
==
atom1
and
a
.
atom
[
2
]
==
atom3
)
or
(
a
.
atom
[
2
]
==
atom1
and
a
.
atom
[
0
]
==
atom3
))))
continue
;
result
=
a
.
angle
;
break
;
}
return
result
;
}
float
Link
::
chiralVolume
(
const
std
::
string
&
centreID
)
const
// return result;
{
// }
float
result
=
0
;
for
(
auto
&
cv
:
mChiralCentres
)
{
if
(
cv
.
id
!=
centreID
)
continue
;
// calculate the expected chiral volume
// the edges
float
a
=
atomBondValue
(
cv
.
atomCentre
,
cv
.
atom
[
0
]);
float
b
=
atomBondValue
(
cv
.
atomCentre
,
cv
.
atom
[
1
]);
float
c
=
atomBondValue
(
cv
.
atomCentre
,
cv
.
atom
[
2
]);
// the angles for the top of the tetrahedron
float
alpha
=
bondAngle
(
cv
.
atom
[
0
],
cv
.
atomCentre
,
cv
.
atom
[
1
]);
float
beta
=
bondAngle
(
cv
.
atom
[
1
],
cv
.
atomCentre
,
cv
.
atom
[
2
]);
float
gamma
=
bondAngle
(
cv
.
atom
[
2
],
cv
.
atomCentre
,
cv
.
atom
[
0
]);
float
cosa
=
cos
(
alpha
*
kPI
/
180
);
float
cosb
=
cos
(
beta
*
kPI
/
180
);
float
cosc
=
cos
(
gamma
*
kPI
/
180
);
result
=
(
a
*
b
*
c
*
sqrt
(
1
+
2
*
cosa
*
cosb
*
cosc
-
(
cosa
*
cosa
)
-
(
cosb
*
cosb
)
-
(
cosc
*
cosc
)))
/
6
;
if
(
cv
.
volumeSign
==
negativ
)
result
=
-
result
;
break
;
}
return
result
;
}
// --------------------------------------------------------------------
// --------------------------------------------------------------------
// a factory class to generate compounds
// a factory class to generate compounds
const
std
::
map
<
std
::
string
,
char
>
kAAMap
{
const
std
::
map
<
std
::
string
,
char
>
kAAMap
{
{
"ALA"
,
'A'
},
{
"ALA"
,
'A'
},
{
"ARG"
,
'R'
},
{
"ARG"
,
'R'
},
{
"ASN"
,
'N'
},
{
"ASN"
,
'N'
},
{
"ASP"
,
'D'
},
{
"ASP"
,
'D'
},
{
"CYS"
,
'C'
},
{
"CYS"
,
'C'
},
{
"GLN"
,
'Q'
},
{
"GLN"
,
'Q'
},
{
"GLU"
,
'E'
},
{
"GLU"
,
'E'
},
{
"GLY"
,
'G'
},
{
"GLY"
,
'G'
},
{
"HIS"
,
'H'
},
{
"HIS"
,
'H'
},
{
"ILE"
,
'I'
},
{
"ILE"
,
'I'
},
{
"LEU"
,
'L'
},
{
"LEU"
,
'L'
},
{
"LYS"
,
'K'
},
{
"LYS"
,
'K'
},
{
"MET"
,
'M'
},
{
"MET"
,
'M'
},
{
"PHE"
,
'F'
},
{
"PHE"
,
'F'
},
{
"PRO"
,
'P'
},
{
"PRO"
,
'P'
},
{
"SER"
,
'S'
},
{
"SER"
,
'S'
},
{
"THR"
,
'T'
},
{
"THR"
,
'T'
},
{
"TRP"
,
'W'
},
{
"TRP"
,
'W'
},
{
"TYR"
,
'Y'
},
{
"TYR"
,
'Y'
},
{
"VAL"
,
'V'
},
{
"VAL"
,
'V'
},
{
"GLX"
,
'Z'
},
{
"GLX"
,
'Z'
},
{
"ASX"
,
'B'
}
{
"ASX"
,
'B'
}};
};
const
std
::
map
<
std
::
string
,
char
>
kBaseMap
{
const
std
::
map
<
std
::
string
,
char
>
kBaseMap
{
{
"A"
,
'A'
},
{
"A"
,
'A'
},
{
"C"
,
'C'
},
{
"C"
,
'C'
},
{
"G"
,
'G'
},
{
"G"
,
'G'
},
{
"T"
,
'T'
},
{
"T"
,
'T'
},
{
"U"
,
'U'
},
{
"U"
,
'U'
},
{
"DA"
,
'A'
},
{
"DA"
,
'A'
},
{
"DC"
,
'C'
},
{
"DC"
,
'C'
},
{
"DG"
,
'G'
},
{
"DG"
,
'G'
},
{
"DT"
,
'T'
}};
{
"DT"
,
'T'
}
};
// --------------------------------------------------------------------
// --------------------------------------------------------------------
...
@@ -1033,20 +302,17 @@ class CompoundFactoryImpl
...
@@ -1033,20 +302,17 @@ class CompoundFactoryImpl
public
:
public
:
CompoundFactoryImpl
();
CompoundFactoryImpl
();
CompoundFactoryImpl
(
const
std
::
string
&
file
,
CompoundFactoryImpl
*
next
);
CompoundFactoryImpl
(
const
std
::
string
&
file
,
CompoundFactoryImpl
*
next
);
~
CompoundFactoryImpl
()
~
CompoundFactoryImpl
()
{
{
delete
mNext
;
delete
mNext
;
}
}
Compound
*
get
(
std
::
string
id
);
Compound
*
get
(
std
::
string
id
);
Compound
*
create
(
std
::
string
id
);
Compound
*
create
(
std
::
string
id
);
const
Link
*
getLink
(
std
::
string
id
);
CompoundFactoryImpl
*
pop
()
const
Link
*
createLink
(
std
::
string
id
);
CompoundFactoryImpl
*
pop
()
{
{
auto
result
=
mNext
;
auto
result
=
mNext
;
mNext
=
nullptr
;
mNext
=
nullptr
;
...
@@ -1054,93 +320,65 @@ class CompoundFactoryImpl
...
@@ -1054,93 +320,65 @@ class CompoundFactoryImpl
return
result
;
return
result
;
}
}
std
::
string
unalias
(
const
std
::
string
&
resName
)
const
bool
isKnownPeptide
(
const
std
::
string
&
resName
)
{
std
::
string
result
=
resName
;
auto
&
e
=
const_cast
<
cif
::
File
&>
(
mFile
)[
"comp_synonym_list"
];
for
(
auto
&
synonym
:
e
[
"chem_comp_synonyms"
])
{
if
(
ba
::
iequals
(
synonym
[
"comp_alternative_id"
].
as
<
std
::
string
>
(),
resName
)
==
false
)
continue
;
result
=
synonym
[
"comp_id"
].
as
<
std
::
string
>
();
ba
::
trim
(
result
);
break
;
}
if
(
result
.
empty
()
and
mNext
)
result
=
mNext
->
unalias
(
resName
);
return
result
;
}
bool
isKnownPeptide
(
const
std
::
string
&
resName
)
{
{
return
mKnownPeptides
.
count
(
resName
)
or
return
mKnownPeptides
.
count
(
resName
)
or
(
mNext
!=
nullptr
and
mNext
->
isKnownPeptide
(
resName
));
(
mNext
!=
nullptr
and
mNext
->
isKnownPeptide
(
resName
));
}
}
bool
isKnownBase
(
const
std
::
string
&
resName
)
bool
isKnownBase
(
const
std
::
string
&
resName
)
{
{
return
mKnownBases
.
count
(
resName
)
or
return
mKnownBases
.
count
(
resName
)
or
(
mNext
!=
nullptr
and
mNext
->
isKnownBase
(
resName
));
(
mNext
!=
nullptr
and
mNext
->
isKnownBase
(
resName
));
}
}
private
:
private
:
std
::
shared_timed_mutex
mMutex
;
std
::
shared_timed_mutex
mMutex
;
std
::
string
mPath
;
std
::
string
mPath
;
std
::
vector
<
Compound
*>
mCompounds
;
std
::
vector
<
Compound
*>
mCompounds
;
std
::
vector
<
const
Link
*>
mLinks
;
std
::
set
<
std
::
string
>
mKnownPeptides
;
std
::
set
<
std
::
string
>
mKnownPeptides
;
std
::
set
<
std
::
string
>
mKnownBases
;
std
::
set
<
std
::
string
>
mKnownBases
;
std
::
set
<
std
::
string
>
mMissing
;
std
::
set
<
std
::
string
>
mMissing
;
CompoundFactoryImpl
*
mNext
=
nullptr
;
cif
::
File
mFile
;
CompoundFactoryImpl
*
mNext
=
nullptr
;
};
};
// --------------------------------------------------------------------
// --------------------------------------------------------------------
CompoundFactoryImpl
::
CompoundFactoryImpl
()
CompoundFactoryImpl
::
CompoundFactoryImpl
()
{
{
for
(
const
auto
&
[
key
,
value
]
:
kAAMap
)
for
(
const
auto
&
[
key
,
value
]
:
kAAMap
)
mKnownPeptides
.
insert
(
key
);
mKnownPeptides
.
insert
(
key
);
for
(
const
auto
&
[
key
,
value
]
:
kBaseMap
)
for
(
const
auto
&
[
key
,
value
]
:
kBaseMap
)
mKnownBases
.
insert
(
key
);
mKnownBases
.
insert
(
key
);
}
}
CompoundFactoryImpl
::
CompoundFactoryImpl
(
const
std
::
string
&
file
,
CompoundFactoryImpl
*
next
)
CompoundFactoryImpl
::
CompoundFactoryImpl
(
const
std
::
string
&
file
,
CompoundFactoryImpl
*
next
)
:
mPath
(
file
),
mFile
(
file
),
mNext
(
next
)
:
mPath
(
file
)
,
mNext
(
next
)
{
{
const
std
::
regex
peptideRx
(
"(?:[lmp]-)?peptide"
,
std
::
regex
::
icase
);
cif
::
File
cifFile
(
file
);
if
(
not
cifFile
.
isValid
())
throw
std
::
runtime_error
(
"Invalid compound file"
);
auto
&
cat
=
mFile
.
firstDatablock
()[
"chem_comp"
];
for
(
auto
&
db
:
cifFile
)
for
(
auto
&
chemComp
:
cat
)
{
{
std
::
string
group
,
threeLetterCode
;
auto
compound
=
std
::
make_unique
<
Compound
>
(
db
);
cif
::
tie
(
group
,
threeLetterCode
)
=
chemComp
.
get
(
"group"
,
"three_letter_code"
);
mCompounds
.
push_back
(
compound
.
release
());
if
(
std
::
regex_match
(
group
,
peptideRx
))
mKnownPeptides
.
insert
(
threeLetterCode
);
else
if
(
ba
::
iequals
(
group
,
"DNA"
)
or
ba
::
iequals
(
group
,
"RNA"
))
mKnownBases
.
insert
(
threeLetterCode
);
}
}
}
}
Compound
*
CompoundFactoryImpl
::
get
(
std
::
string
id
)
Compound
*
CompoundFactoryImpl
::
get
(
std
::
string
id
)
{
{
std
::
shared_lock
lock
(
mMutex
);
std
::
shared_lock
lock
(
mMutex
);
ba
::
to_upper
(
id
);
ba
::
to_upper
(
id
);
Compound
*
result
=
nullptr
;
Compound
*
result
=
nullptr
;
for
(
auto
cmp
:
mCompounds
)
for
(
auto
cmp
:
mCompounds
)
{
{
if
(
cmp
->
id
()
==
id
)
if
(
cmp
->
id
()
==
id
)
{
{
...
@@ -1148,119 +386,72 @@ Compound* CompoundFactoryImpl::get(std::string id)
...
@@ -1148,119 +386,72 @@ Compound* CompoundFactoryImpl::get(std::string id)
break
;
break
;
}
}
}
}
if
(
result
==
nullptr
and
mNext
!=
nullptr
)
if
(
result
==
nullptr
and
mNext
!=
nullptr
)
result
=
mNext
->
get
(
id
);
result
=
mNext
->
get
(
id
);
return
result
;
return
result
;
}
}
Compound
*
CompoundFactoryImpl
::
create
(
std
::
string
id
)
Compound
*
CompoundFactoryImpl
::
create
(
std
::
string
id
)
{
{
ba
::
to_upper
(
id
);
ba
::
to_upper
(
id
);
Compound
*
result
=
get
(
id
);
Compound
*
result
=
get
(
id
);
if
(
result
==
nullptr
and
mMissing
.
count
(
id
)
==
0
and
not
mFile
.
empty
())
//
if (result == nullptr and mMissing.count(id) == 0 and not mFile.empty())
{
//
{
std
::
unique_lock
lock
(
mMutex
);
//
std::unique_lock lock(mMutex);
auto
&
cat
=
mFile
.
firstDatablock
()[
"chem_comp"
];
// auto &
cat = mFile.firstDatablock()["chem_comp"];
auto
rs
=
cat
.
find
(
cif
::
Key
(
"three_letter_code"
)
==
id
);
// auto rs = cat.find(cif::Key("three_letter_code") == id);
if
(
not
rs
.
empty
())
{
auto
row
=
rs
.
front
();
std
::
string
name
,
group
;
uint32_t
numberAtomsAll
,
numberAtomsNh
;
cif
::
tie
(
name
,
group
,
numberAtomsAll
,
numberAtomsNh
)
=
row
.
get
(
"name"
,
"group"
,
"number_atoms_all"
,
"number_atoms_nh"
);
ba
::
trim
(
name
);
ba
::
trim
(
group
);
if
(
mFile
.
get
(
"comp_"
+
id
)
==
nullptr
)
{
auto
clibd_mon
=
fs
::
path
(
getenv
(
"CLIBD_MON"
));
fs
::
path
resFile
=
clibd_mon
/
ba
::
to_lower_copy
(
id
.
substr
(
0
,
1
))
/
(
id
+
".cif"
);
if
(
not
fs
::
exists
(
resFile
)
and
(
id
==
"COM"
or
id
==
"CON"
or
"PRN"
))
// seriously...
resFile
=
clibd_mon
/
ba
::
to_lower_copy
(
id
.
substr
(
0
,
1
))
/
(
id
+
'_'
+
id
+
".cif"
);
if
(
not
fs
::
exists
(
resFile
))
mMissing
.
insert
(
id
);
else
{
mCompounds
.
push_back
(
new
Compound
(
resFile
.
string
(),
id
,
name
,
group
));
result
=
mCompounds
.
back
();
}
}
else
{
mCompounds
.
push_back
(
new
Compound
(
mPath
,
id
,
name
,
group
));
result
=
mCompounds
.
back
();
}
}
if
(
result
==
nullptr
and
mNext
!=
nullptr
)
result
=
mNext
->
create
(
id
);
}
return
result
;
}
const
Link
*
CompoundFactoryImpl
::
getLink
(
std
::
string
id
)
// if (not rs.empty()
)
{
//
{
std
::
shared_lock
lock
(
mMutex
);
// auto row = rs.front(
);
ba
::
to_upper
(
id
);
// std::string name, group;
// uint32_t numberAtomsAll, numberAtomsNh;
// cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
// row.get("name", "group", "number_atoms_all", "number_atoms_nh");
const
Link
*
result
=
nullptr
;
// ba::trim(name);
// ba::trim(group);
for
(
auto
link
:
mLinks
)
{
if
(
link
->
id
()
==
id
)
{
result
=
link
;
break
;
}
}
if
(
result
==
nullptr
and
mNext
!=
nullptr
)
result
=
mNext
->
getLink
(
id
);
return
result
;
}
const
Link
*
CompoundFactoryImpl
::
createLink
(
std
::
string
id
)
// if (mFile.get("comp_" + id) == nullptr
)
{
//
{
ba
::
to_upper
(
id
);
// auto clibd_mon = fs::path(getenv("CLIBD_MON")
);
const
Link
*
result
=
getLink
(
id
);
// fs::path resFile = clibd_mon / ba::to_lower_copy(id.substr(0, 1)) / (id + ".cif"
);
if
(
result
==
nullptr
)
// if (not fs::exists(resFile) and (id == "COM" or id == "CON" or "PRN")) // seriously...
{
// resFile = clibd_mon / ba::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
std
::
unique_lock
lock
(
mMutex
);
// if (not fs::exists(resFile))
// mMissing.insert(id);
// else
// {
// mCompounds.push_back(new Compound(resFile.string(), id, name, group));
// result = mCompounds.back();
// }
// }
// else
// {
// mCompounds.push_back(new Compound(mPath, id, name, group));
// result = mCompounds.back();
// }
// }
// if (result == nullptr and mNext != nullptr)
// result = mNext->create(id);
// }
auto
db
=
mFile
.
get
(
"link_"
+
id
);
if
(
db
!=
nullptr
)
{
result
=
new
Link
(
*
db
);
mLinks
.
push_back
(
result
);
}
if
(
result
==
nullptr
and
mNext
!=
nullptr
)
result
=
mNext
->
createLink
(
id
);
}
return
result
;
return
result
;
}
}
// --------------------------------------------------------------------
// --------------------------------------------------------------------
CompoundFactory
*
CompoundFactory
::
sInstance
;
CompoundFactory
*
CompoundFactory
::
sInstance
;
thread_local
std
::
unique_ptr
<
CompoundFactory
>
CompoundFactory
::
tlInstance
;
thread_local
std
::
unique_ptr
<
CompoundFactory
>
CompoundFactory
::
tlInstance
;
bool
CompoundFactory
::
sUseThreadLocalInstance
;
bool
CompoundFactory
::
sUseThreadLocalInstance
;
...
@@ -1272,21 +463,6 @@ void CompoundFactory::init(bool useThreadLocalInstanceOnly)
...
@@ -1272,21 +463,6 @@ void CompoundFactory::init(bool useThreadLocalInstanceOnly)
CompoundFactory
::
CompoundFactory
()
CompoundFactory
::
CompoundFactory
()
:
mImpl
(
nullptr
)
:
mImpl
(
nullptr
)
{
{
const
char
*
clibdMon
=
getenv
(
"CLIBD_MON"
);
if
(
clibdMon
!=
nullptr
)
{
fs
::
path
db
=
fs
::
path
(
clibdMon
)
/
"list"
/
"mon_lib_list.cif"
;
if
(
fs
::
exists
(
db
))
pushDictionary
(
db
.
string
());
}
if
(
mImpl
==
nullptr
)
{
if
(
cif
::
VERBOSE
)
std
::
cerr
<<
"Could not load the mon_lib_list.cif file from CCP4, please make sure you have installed CCP4 and sourced the environment."
<<
std
::
endl
;
mImpl
=
new
CompoundFactoryImpl
();
}
}
}
CompoundFactory
::~
CompoundFactory
()
CompoundFactory
::~
CompoundFactory
()
...
@@ -1294,7 +470,7 @@ CompoundFactory::~CompoundFactory()
...
@@ -1294,7 +470,7 @@ CompoundFactory::~CompoundFactory()
delete
mImpl
;
delete
mImpl
;
}
}
CompoundFactory
&
CompoundFactory
::
instance
()
CompoundFactory
&
CompoundFactory
::
instance
()
{
{
if
(
sUseThreadLocalInstance
)
if
(
sUseThreadLocalInstance
)
{
{
...
@@ -1321,20 +497,20 @@ void CompoundFactory::clear()
...
@@ -1321,20 +497,20 @@ void CompoundFactory::clear()
}
}
}
}
void
CompoundFactory
::
pushDictionary
(
const
std
::
string
&
inDictFile
)
void
CompoundFactory
::
pushDictionary
(
const
std
::
string
&
inDictFile
)
{
{
if
(
not
fs
::
exists
(
inDictFile
))
if
(
not
fs
::
exists
(
inDictFile
))
throw
std
::
runtime_error
(
"file not found: "
+
inDictFile
);
throw
std
::
runtime_error
(
"file not found: "
+
inDictFile
);
// ifstream file(inDictFile);
// ifstream file(inDictFile);
// if (not file.is_open())
// if (not file.is_open())
// throw std::runtime_error("Could not open peptide list " + inDictFile);
// throw std::runtime_error("Could not open peptide list " + inDictFile);
try
try
{
{
mImpl
=
new
CompoundFactoryImpl
(
inDictFile
,
mImpl
);
mImpl
=
new
CompoundFactoryImpl
(
inDictFile
,
mImpl
);
}
}
catch
(
const
std
::
exception
&
ex
)
catch
(
const
std
::
exception
&
ex
)
{
{
std
::
cerr
<<
"Error loading dictionary "
<<
inDictFile
<<
std
::
endl
;
std
::
cerr
<<
"Error loading dictionary "
<<
inDictFile
<<
std
::
endl
;
throw
;
throw
;
...
@@ -1348,39 +524,32 @@ void CompoundFactory::popDictionary()
...
@@ -1348,39 +524,32 @@ void CompoundFactory::popDictionary()
}
}
// id is the three letter code
// id is the three letter code
const
Compound
*
CompoundFactory
::
get
(
std
::
string
id
)
const
Compound
*
CompoundFactory
::
get
(
std
::
string
id
)
{
{
return
mImpl
->
get
(
id
);
return
mImpl
->
get
(
id
);
}
}
const
Compound
*
CompoundFactory
::
create
(
std
::
string
id
)
const
Compound
*
CompoundFactory
::
create
(
std
::
string
id
)
{
{
return
mImpl
->
create
(
id
);
return
mImpl
->
create
(
id
);
}
}
const
Link
*
CompoundFactory
::
getLink
(
std
::
string
id
)
bool
CompoundFactory
::
isKnownPeptide
(
const
std
::
string
&
resName
)
const
{
return
mImpl
->
getLink
(
id
);
}
const
Link
*
CompoundFactory
::
createLink
(
std
::
string
id
)
{
return
mImpl
->
createLink
(
id
);
}
bool
CompoundFactory
::
isKnownPeptide
(
const
std
::
string
&
resName
)
const
{
{
return
mImpl
->
isKnownPeptide
(
resName
);
return
mImpl
->
isKnownPeptide
(
resName
);
}
}
bool
CompoundFactory
::
isKnownBase
(
const
std
::
string
&
resName
)
const
bool
CompoundFactory
::
isKnownBase
(
const
std
::
string
&
resName
)
const
{
{
return
mImpl
->
isKnownBase
(
resName
);
return
mImpl
->
isKnownBase
(
resName
);
}
}
std
::
string
CompoundFactory
::
unalias
(
const
std
::
string
&
resName
)
const
// --------------------------------------------------------------------
std
::
vector
<
std
::
string
>
Compound
::
addExtraComponents
(
const
std
::
filesystem
::
path
&
components
)
{
{
return
mImpl
->
unalias
(
resName
);
}
}
}
}
// namespace mmcif
src/PDB2Cif.cpp
View file @
915d6504
...
@@ -457,7 +457,6 @@ class PDBFileParser
...
@@ -457,7 +457,6 @@ class PDBFileParser
char
iCode
;
char
iCode
;
int
numHetAtoms
=
0
;
int
numHetAtoms
=
0
;
std
::
string
text
;
std
::
string
text
;
bool
sugar
=
false
;
std
::
string
asymID
;
std
::
string
asymID
;
std
::
vector
<
PDBRecord
*>
atoms
;
std
::
vector
<
PDBRecord
*>
atoms
;
bool
processed
=
false
;
bool
processed
=
false
;
...
@@ -467,14 +466,6 @@ class PDBFileParser
...
@@ -467,14 +466,6 @@ class PDBFileParser
HET
(
const
std
::
string
&
hetID
,
char
chainID
,
int
seqNum
,
char
iCode
,
int
numHetAtoms
=
0
,
const
std
::
string
&
text
=
{})
HET
(
const
std
::
string
&
hetID
,
char
chainID
,
int
seqNum
,
char
iCode
,
int
numHetAtoms
=
0
,
const
std
::
string
&
text
=
{})
:
hetID
(
hetID
),
chainID
(
chainID
),
seqNum
(
seqNum
),
iCode
(
iCode
),
numHetAtoms
(
numHetAtoms
),
text
(
text
)
:
hetID
(
hetID
),
chainID
(
chainID
),
seqNum
(
seqNum
),
iCode
(
iCode
),
numHetAtoms
(
numHetAtoms
),
text
(
text
)
{
{
// just in case we don't have a CCP4 available
if
(
hetID
==
"MAN"
or
hetID
==
"BMA"
or
hetID
==
"NAG"
or
hetID
==
"NDG"
or
hetID
==
"FUC"
or
hetID
==
"FUL"
)
sugar
=
true
;
else
{
auto
compound
=
CompoundFactory
::
instance
().
create
(
hetID
);
sugar
=
compound
?
compound
->
isSugar
()
:
false
;
}
}
}
};
};
...
@@ -4374,33 +4365,38 @@ void PDBFileParser::ConstructEntities()
...
@@ -4374,33 +4365,38 @@ void PDBFileParser::ConstructEntities()
mMod2parent
.
count
(
cc
)
?
mMod2parent
[
cc
]
:
cc
mMod2parent
.
count
(
cc
)
?
mMod2parent
[
cc
]
:
cc
);
);
std
::
string
formula
=
mFormuls
[
cc
];
std
::
string
name
;
if
(
formula
.
empty
()
and
compound
!=
nullptr
)
std
::
string
formula
;
formula
=
compound
->
formula
();
std
::
string
type
;
else
{
const
std
::
regex
rx
(
R"(\d+\((.+)\))"
);
std
::
smatch
m
;
if
(
std
::
regex_match
(
formula
,
m
,
rx
))
formula
=
m
[
1
].
str
();
}
std
::
string
name
=
mHetnams
[
cc
];
if
(
name
.
empty
()
and
compound
!=
nullptr
)
name
=
compound
->
name
();
std
::
string
type
=
"other"
;
std
::
string
nstd
=
"."
;
std
::
string
nstd
=
"."
;
std
::
string
formulaWeight
;
if
(
compound
!=
nullptr
)
if
(
compound
!=
nullptr
)
{
{
name
=
compound
->
name
();
type
=
compound
->
type
();
type
=
compound
->
type
();
if
(
type
.
empty
())
type
=
"NON-POLYMER"
;
if
(
iequals
(
type
,
"L-peptide linking"
)
or
iequals
(
type
,
"peptide linking"
))
if
(
iequals
(
type
,
"L-peptide linking"
)
or
iequals
(
type
,
"peptide linking"
))
nstd
=
"y"
;
nstd
=
"y"
;
formula
=
compound
->
formula
();
formulaWeight
=
std
::
to_string
(
compound
->
formulaWeight
());
}
if
(
name
.
empty
())
name
=
mHetnams
[
cc
];
if
(
type
.
empty
())
type
=
"NON-POLYMER"
;
if
(
formula
.
empty
())
{
formula
=
mFormuls
[
cc
];
const
std
::
regex
rx
(
R"(\d+\((.+)\))"
);
std
::
smatch
m
;
if
(
std
::
regex_match
(
formula
,
m
,
rx
))
formula
=
m
[
1
].
str
();
}
}
if
(
modResSet
.
count
(
cc
))
if
(
modResSet
.
count
(
cc
))
...
@@ -4410,6 +4406,7 @@ void PDBFileParser::ConstructEntities()
...
@@ -4410,6 +4406,7 @@ void PDBFileParser::ConstructEntities()
{
"id"
,
cc
},
{
"id"
,
cc
},
{
"name"
,
name
},
{
"name"
,
name
},
{
"formula"
,
formula
},
{
"formula"
,
formula
},
{
"formula_weight"
,
formulaWeight
},
{
"mon_nstd_flag"
,
nstd
},
{
"mon_nstd_flag"
,
nstd
},
{
"type"
,
type
}
{
"type"
,
type
}
});
});
...
...
src/Structure.cpp
View file @
915d6504
...
@@ -566,16 +566,6 @@ int Atom::charge() const
...
@@ -566,16 +566,6 @@ int Atom::charge() const
return
property
<
int
>
(
"pdbx_formal_charge"
);
return
property
<
int
>
(
"pdbx_formal_charge"
);
}
}
std
::
string
Atom
::
energyType
()
const
{
std
::
string
result
;
if
(
impl
()
and
impl
()
->
mCompound
)
result
=
impl
()
->
mCompound
->
getAtomByID
(
impl
()
->
mAtomID
).
typeEnergy
;
return
result
;
}
float
Atom
::
uIso
()
const
float
Atom
::
uIso
()
const
{
{
float
result
;
float
result
;
...
@@ -1013,21 +1003,6 @@ bool Residue::isEntity() const
...
@@ -1013,21 +1003,6 @@ bool Residue::isEntity() const
return
a1
.
size
()
==
a2
.
size
();
return
a1
.
size
()
==
a2
.
size
();
}
}
bool
Residue
::
isSugar
()
const
{
return
compound
().
isSugar
();
}
bool
Residue
::
isPyranose
()
const
{
return
cif
::
iequals
(
compound
().
group
(),
"pyranose"
);
}
bool
Residue
::
isFuranose
()
const
{
return
cif
::
iequals
(
compound
().
group
(),
"furanose"
);
}
std
::
string
Residue
::
authID
()
const
std
::
string
Residue
::
authID
()
const
{
{
std
::
string
result
;
std
::
string
result
;
...
@@ -2101,7 +2076,7 @@ void Structure::insertCompound(const std::string& compoundID, bool isEntity)
...
@@ -2101,7 +2076,7 @@ void Structure::insertCompound(const std::string& compoundID, bool isEntity)
{
{
auto
compound
=
Compound
::
create
(
compoundID
);
auto
compound
=
Compound
::
create
(
compoundID
);
if
(
compound
==
nullptr
)
if
(
compound
==
nullptr
)
throw
std
::
runtime_error
(
"Trying to insert unknown compound "
+
compoundID
+
" (not found in CC
P4 monomers lib
)"
);
throw
std
::
runtime_error
(
"Trying to insert unknown compound "
+
compoundID
+
" (not found in CC
D
)"
);
cif
::
Datablock
&
db
=
*
mFile
.
impl
().
mDb
;
cif
::
Datablock
&
db
=
*
mFile
.
impl
().
mDb
;
...
@@ -2113,6 +2088,7 @@ void Structure::insertCompound(const std::string& compoundID, bool isEntity)
...
@@ -2113,6 +2088,7 @@ void Structure::insertCompound(const std::string& compoundID, bool isEntity)
{
"id"
,
compoundID
},
{
"id"
,
compoundID
},
{
"name"
,
compound
->
name
()
},
{
"name"
,
compound
->
name
()
},
{
"formula"
,
compound
->
formula
()
},
{
"formula"
,
compound
->
formula
()
},
{
"formula_weight"
,
compound
->
formulaWeight
()
},
{
"type"
,
compound
->
type
()
}
{
"type"
,
compound
->
type
()
}
});
});
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment