Commit 4206f266 by maarten

Remove orphans

git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@409 a1961a4f-ab94-4bcc-80e8-33b5a54de466
parent dd7a4f11
......@@ -237,11 +237,9 @@ class Datablock
namespace detail
{
// ItemReference is a helper class
struct ItemReference
class ItemReference
{
const char* mName;
size_t mColumn;
ItemRow* mRow;
public:
template<typename T>
ItemReference& operator=(const T& value)
......@@ -293,6 +291,20 @@ namespace detail
bool operator!=(const string& s) const { return s != c_str(); }
bool operator==(const string& s) const { return s == c_str(); }
private:
friend class ::cif::Row;
ItemReference(const char* name, size_t column, Row& row)
: mName(name), mColumn(column), mRow(row) {}
ItemReference(const char* name, size_t column, const Row& row)
: mName(name), mColumn(column), mRow(const_cast<Row&>(row)), mConst(true) {}
const char* mName;
size_t mColumn;
Row& mRow;
bool mConst = false;
};
template<>
......@@ -514,10 +526,28 @@ class Row
friend class RowComparator;
friend struct detail::ItemReference;
Row(ItemRow* data = nullptr) : mData(data) {}
Row(ItemRow* data = nullptr, bool cascadeUpdate = true)
: mData(data), mCascadeUpdate(cascadeUpdate) {}
Row(const ItemRow* data)
: Row(const_cast<ItemRow*>(data), false)
{}
Row(const Row& rhs);
Row& operator=(const Row& rhs);
/// When updating a value, you might want to change linked records as well
/// But not always.
void setCascadeUpdate(bool cascadeUpdate)
{
mCascadeUpdate = cascadeUpdate;
}
void setCascadeDelet(bool cascadeDelete)
{
mCascadeDelete = cascadeDelete;
}
struct const_iterator : public std::iterator<std::forward_iterator_tag, const Item>
{
typedef std::iterator<std::forward_iterator_tag, Item> baseType;
......@@ -557,33 +587,35 @@ class Row
// TODO: implement real const version?
friend class detail::ItemReference;
const detail::ItemReference operator[](size_t column) const
{
return detail::ItemReference{"<anonymous column>", column, mData};
return detail::ItemReference("<anonymous column>", column, *this);
}
const detail::ItemReference operator[](const char* itemTag) const
{
size_t column = ColumnForItemTag(itemTag);
return detail::ItemReference{itemTag, column, mData};
return detail::ItemReference(itemTag, column, *this);
}
detail::ItemReference operator[](const char* itemTag)
{
size_t column = ColumnForItemTag(itemTag);
return detail::ItemReference{itemTag, column, mData};
return detail::ItemReference(itemTag, column, *this);
}
const detail::ItemReference operator[](const string& itemTag) const
{
size_t column = ColumnForItemTag(itemTag.c_str());
return detail::ItemReference{itemTag.c_str(), column, mData};
return detail::ItemReference(itemTag.c_str(), column, *this);
}
detail::ItemReference operator[](const string& itemTag)
{
size_t column = ColumnForItemTag(itemTag.c_str());
return detail::ItemReference{itemTag.c_str(), column, mData};
return detail::ItemReference(itemTag.c_str(), column, *this);
}
template<typename... C>
......@@ -612,6 +644,8 @@ class Row
private:
friend std::ostream& operator<<(std::ostream& os, const Row& row);
void assign(const string& name, const string& value, bool emplacing);
void assign(size_t column, const string& value, bool emplacing);
void assign(const Item& i, bool emplacing);
......@@ -622,10 +656,14 @@ class Row
ItemRow* mData;
uint32 mLineNr = 0;
bool mCascadeUpdate = true;
bool mCascadeDelete = true;
};
// swap for Rows is defined below
std::ostream& operator<<(std::ostream& os, const Row& row);
// --------------------------------------------------------------------
// some more templates to be able to do querying
......@@ -651,7 +689,7 @@ struct AllConditionImpl : public ConditionImpl
struct Condition
{
Condition() : mImpl(new detail::AllConditionImpl()) {}
Condition() : mImpl(nullptr) {}
Condition(detail::ConditionImpl* impl) : mImpl(impl) {}
Condition(Condition&& rhs)
......@@ -673,7 +711,8 @@ struct Condition
void prepare(const Category& c)
{
mImpl->prepare(c);
if (mImpl)
mImpl->prepare(c);
mPrepared = true;
}
......@@ -681,12 +720,12 @@ struct Condition
{
assert(mImpl);
assert(mPrepared);
return mImpl->test(c, r);
return mImpl ? mImpl->test(c, r) : false;
}
std::string str() const
{
return mImpl->str();
return mImpl ? mImpl->str() : "";
}
detail::ConditionImpl* mImpl;
......@@ -919,12 +958,20 @@ struct orConditionImpl : public ConditionImpl
inline Condition operator&&(Condition&& a, Condition&& b)
{
return Condition(new detail::andConditionImpl(std::move(a), std::move(b)));
if (a.mImpl and b.mImpl)
return Condition(new detail::andConditionImpl(std::move(a), std::move(b)));
if (a.mImpl)
return Condition(std::move(a));
return Condition(std::move(b));
}
inline Condition operator||(Condition&& a, Condition&& b)
{
return Condition(new detail::orConditionImpl(std::move(a), std::move(b)));
if (a.mImpl and b.mImpl)
return Condition(new detail::orConditionImpl(std::move(a), std::move(b)));
if (a.mImpl)
return Condition(std::move(a));
return Condition(std::move(b));
}
inline
......@@ -1073,8 +1120,6 @@ class Category
const string name() const { return mName; }
const detail::ItemReference getFirstItem(const char* ItemName) const;
struct iterator : public std::iterator<std::forward_iterator_tag, Row>
{
friend class Category;
......@@ -1101,6 +1146,32 @@ class Category
iterator begin();
iterator end();
struct const_iterator : public std::iterator<std::forward_iterator_tag, const Row>
{
friend class Category;
typedef std::iterator<std::forward_iterator_tag, const Row> baseType;
typedef typename baseType::pointer pointer;
typedef typename baseType::reference reference;
const_iterator(const ItemRow* data) : mCurrent(data) {}
reference operator*() { return mCurrent; }
pointer operator->() { return &mCurrent; }
const_iterator& operator++();
const_iterator operator++(int) { const_iterator result(*this); this->operator++(); return result; }
bool operator==(const const_iterator& rhs) const { return mCurrent == rhs.mCurrent; }
bool operator!=(const const_iterator& rhs) const { return not (mCurrent == rhs.mCurrent); }
private:
const Row mCurrent;
};
const_iterator begin() const;
const_iterator end() const;
bool empty() const;
size_t size() const;
......@@ -1132,6 +1203,12 @@ class Category
void erase(Row r);
void erase(iterator ri);
void eraseOrphans(Condition&& cond);
/// an orphan is a row that is the child side of one or more
/// links and for which there is no single parent left.
bool isOrphan(Row r);
bool isValid();
const Validator& getValidator() const;
......@@ -1157,6 +1234,7 @@ class Category
vector<string> getColumnNames() const;
void reorderByIndex();
void sort(std::function<int(const Row&, const Row&)> comparator);
private:
......
......@@ -141,7 +141,12 @@ class Atom
std::swap(mImpl, b.mImpl);
}
int compare(const Atom& b) const;
private:
friend class Structure;
void setID(int id);
struct AtomImpl* mImpl;
};
......@@ -391,6 +396,10 @@ class Structure
void moveAtom(Atom& a, Point p); // move atom to a new location
void changeResidue(const Residue& res, const std::string& newCompound,
const std::vector<std::tuple<std::string,std::string>>& remappedAtoms);
/// To sort the atoms in order of model > asym-id > res-id > atom-id
/// Will asssign new atom_id's to all atoms. Be carefull
void sortAtoms();
// iterator for all residues
......
......@@ -212,7 +212,10 @@ namespace detail
template<>
ItemReference& ItemReference::operator=(const string& value)
{
Row(mRow).assign(mName, value, false);
if (mConst)
throw logic_error("Attempt to write to a constant row");
mRow.assign(mName, value, false);
return *this;
}
......@@ -220,11 +223,11 @@ const char* ItemReference::c_str() const
{
const char* result = kEmptyResult;
if (mRow != nullptr /* and mRow->mCategory != nullptr*/)
if (mRow.mData != nullptr /* and mRow.mData->mCategory != nullptr*/)
{
// assert(mRow->mCategory);
// assert(mRow.mData->mCategory);
for (auto iv = mRow->mValues; iv != nullptr; iv = iv->mNext)
for (auto iv = mRow.mData->mValues; iv != nullptr; iv = iv->mNext)
{
if (iv->mColumnIndex == mColumn)
{
......@@ -243,9 +246,9 @@ const char* ItemReference::c_str(const char* defaultValue) const
{
const char* result = defaultValue;
if (mRow != nullptr and mRow->mCategory != nullptr)
if (mRow.mData != nullptr and mRow.mData->mCategory != nullptr)
{
for (auto iv = mRow->mValues; iv != nullptr; iv = iv->mNext)
for (auto iv = mRow.mData->mValues; iv != nullptr; iv = iv->mNext)
{
if (iv->mColumnIndex == mColumn)
{
......@@ -257,9 +260,9 @@ const char* ItemReference::c_str(const char* defaultValue) const
}
}
if (result == defaultValue and mColumn < mRow->mCategory->mColumns.size()) // not found, perhaps the category has a default defined?
if (result == defaultValue and mColumn < mRow.mData->mCategory->mColumns.size()) // not found, perhaps the category has a default defined?
{
auto iv = mRow->mCategory->mColumns[mColumn].mValidator;
auto iv = mRow.mData->mCategory->mColumns[mColumn].mValidator;
if (iv != nullptr and not iv->mDefault.empty())
result = iv->mDefault.c_str();
}
......@@ -275,7 +278,7 @@ bool ItemReference::empty() const
void ItemReference::swap(ItemReference& b)
{
Row::swap(mColumn, mRow, b.mRow);
Row::swap(mColumn, mRow.mData, b.mRow.mData);
}
}
......@@ -304,7 +307,12 @@ string Datablock::firstItem(const string& tag) const
{
if (iequals(cat.name(), catName))
{
result = cat.getFirstItem(itemName.c_str()).as<string>();
for (auto row: cat)
{
result = row[itemName].as<string>();
break;
}
break;
}
}
......@@ -1227,6 +1235,35 @@ void Category::reorderByIndex()
std::tie(mHead, mTail) = mIndex->reorder();
}
void Category::sort(std::function<int(const Row&, const Row&)> comparator)
{
if (mHead == nullptr)
return;
vector<ItemRow*> rows;
for (auto itemRow = mHead; itemRow != nullptr; itemRow = itemRow->mNext)
rows.push_back(itemRow);
std::stable_sort(rows.begin(), rows.end(),
[&rows,&comparator](ItemRow* ia, ItemRow* ib)
{
Row ra(ia);
Row rb(ib);
return comparator(ra, rb) < 0;
});
mHead = rows.front();
mTail = rows.back();
auto r = mHead;
for (size_t i = 1; i < rows.size(); ++i)
r = r->mNext = rows[i];
r->mNext = nullptr;
assert(r == mTail);
assert(size() == rows.size());
}
size_t Category::size() const
{
size_t result = 0;
......@@ -1433,6 +1470,29 @@ void Category::erase(Condition&& cond)
erase(r);
}
void Category::eraseOrphans(Condition&& cond)
{
RowSet remove(*this);
cond.prepare(*this);
for (auto r: *this)
{
if (cond(*this, r) and isOrphan(r))
{
if (VERBOSE > 1)
cerr << "Removing orphaned record: " << endl
<< r << endl
<< endl;
remove.push_back(r);
}
}
for (auto r: remove)
erase(r);
}
void Category::erase(iterator p)
{
erase(*p);
......@@ -1444,6 +1504,30 @@ void Category::erase(Row r)
if (mCatValidator)
keys = iset(mCatValidator->mKeys.begin(), mCatValidator->mKeys.end());
if (mHead == nullptr)
throw runtime_error("erase");
if (mIndex != nullptr)
mIndex->erase(r.mData);
if (r == mHead)
{
mHead = mHead->mNext;
r.mData->mNext = nullptr;
}
else
{
for (auto pi = mHead; pi != nullptr; pi = pi->mNext)
{
if (pi->mNext == r.mData)
{
pi->mNext = r.mData->mNext;
r.mData->mNext = nullptr;
break;
}
}
}
// links are created based on the _pdbx_item_linked_group_list entries
// in mmcif_pdbx.dic dictionary.
//
......@@ -1467,34 +1551,10 @@ void Category::erase(Row r)
cond = move(cond) && (Key(link->mChildKeys[ix]) == value);
}
childCat->erase(move(cond));
childCat->eraseOrphans(move(cond));
}
if (mHead == nullptr)
throw runtime_error("erase");
if (mIndex != nullptr)
mIndex->erase(r.mData);
if (r == mHead)
{
mHead = mHead->mNext;
r.mData->mNext = nullptr;
delete r.mData;
}
else
{
for (auto pi = mHead; pi != nullptr; pi = pi->mNext)
{
if (pi->mNext == r.mData)
{
pi->mNext = r.mData->mNext;
r.mData->mNext = nullptr;
delete r.mData;
break;
}
}
}
delete r.mData;
// reset mTail, if needed
if (r == mTail)
......@@ -1512,12 +1572,6 @@ void Category::getTagOrder(vector<string>& tags) const
tags.push_back("_" + mName + "." + c.mName);
}
const detail::ItemReference Category::getFirstItem(const char* itemName) const
{
size_t column = getColumnIndex(itemName);
return detail::ItemReference{itemName, column, mHead};
}
Category::iterator Category::begin()
{
return iterator(mHead);
......@@ -1528,6 +1582,46 @@ Category::iterator Category::end()
return iterator(nullptr);
}
Category::const_iterator Category::begin() const
{
return const_iterator(mHead);
}
Category::const_iterator Category::end() const
{
return const_iterator(nullptr);
}
bool Category::isOrphan(Row r)
{
// be safe
if (mCatValidator == nullptr)
return false;
bool isOrphan = true;
for (auto& link: mValidator->getLinksForChild(mName))
{
auto parentCat = mDb.get(link->mParentCategory);
if (parentCat == nullptr)
continue;
Condition cond;
for (size_t ix = 0; ix < link->mChildKeys.size(); ++ix)
{
const char* value = r[link->mChildKeys[ix]].c_str();
cond = move(cond) && (Key(link->mParentKeys[ix]) == value);
}
if (parentCat->exists(std::move(cond)))
{
isOrphan = false;
break;
}
}
return isOrphan;
}
bool Category::isValid()
{
bool result = true;
......@@ -1931,12 +2025,14 @@ void Category::write(ostream& os, const vector<string>& columns)
Row::Row(const Row& rhs)
: mData(rhs.mData)
, mCascadeUpdate(rhs.mCascadeUpdate)
{
}
Row& Row::operator=(const Row& rhs)
{
mData = rhs.mData;
mCascadeUpdate = rhs.mCascadeUpdate;
return *this;
}
......@@ -2043,7 +2139,7 @@ void Row::assign(size_t column, const string& value, bool emplacing)
// see if we need to update any child categories that depend on this value
auto iv = col.mValidator;
if (not emplacing and iv != nullptr)
if (not emplacing and iv != nullptr and mCascadeUpdate)
{
auto& validator = cat->getValidator();
auto& db = cat->db();
......@@ -2284,6 +2380,19 @@ void Row::const_iterator::fetch()
mPtr->mText);
}
std::ostream& operator<<(std::ostream& os, const Row& row)
{
auto category = row.mData->mCategory;
string catName = category->name();
for (auto item = row.mData->mValues; item != nullptr; item = item->mNext)
{
string tagName = category->getColumnName(item->mColumnIndex);
os << '_' << catName << '.' << tagName << ' ' << item->mText << endl;
}
return os;
}
// --------------------------------------------------------------------
File::File()
......
......@@ -308,13 +308,18 @@ vector<const ValidateLink*> Validator::getLinksForParent(const string& category)
return result;
}
//const ValidateLink* Validator::getLinksForChild(const string& category) const
//{
// auto i = find_if(mLinkValidators.begin(), mLinkValidators.end(),
// [&](auto& l) { return l.mChildCategory == category; });
//
// return i == mLinkValidators.end() ? nullptr : &(*i);
//}
vector<const ValidateLink*> Validator::getLinksForChild(const string& category) const
{
vector<const ValidateLink*> result;
for (auto& l: mLinkValidators)
{
if (l.mChildCategory == category)
result.push_back(&l);
}
return result;
}
void Validator::reportError(const string& msg, bool fatal)
{
......
......@@ -342,7 +342,7 @@ Compound::Compound(const fs::path& file, const std::string& id,
else if (iequals(type, "double") or iequals(type, "doub")) b.type = doubleBond;
else if (iequals(type, "triple") or iequals(type, "trip")) b.type = tripleBond;
else if (iequals(type, "deloc") or iequals(type, "aromat") or iequals(type, "aromatic"))
b.type = delocalizedBond;
b.type = delocalizedBond;
else
{
if (VERBOSE)
......
......@@ -349,7 +349,17 @@ struct AtomImpl
else
return i->second;
}
int compare(const AtomImpl& b) const
{
int d = mAsymID.compare(b.mAsymID);
if (d == 0)
d = mSeqID - b.mSeqID;
if (d == 0)
d = mAtomID.compare(b.mAtomID);
return d;
}
const File& mFile;
string mId;
AtomType mType;
......@@ -586,6 +596,16 @@ float Atom::radius() const
return mImpl->mRadius;
}
int Atom::compare(const Atom& b) const
{
return mImpl == b.mImpl ? 0 : mImpl->compare(*b.mImpl);
}
void Atom::setID(int id)
{
mImpl->mId = to_string(id);
}
// --------------------------------------------------------------------
// residue
......@@ -1321,6 +1341,20 @@ void Structure::updateAtomIndex()
sort(mAtomIndex.begin(), mAtomIndex.end(), [this](size_t a, size_t b) { return mAtoms[a].id() < mAtoms[b].id(); });
}
void Structure::sortAtoms()
{
sort(mAtoms.begin(), mAtoms.end(), [](auto& a, auto& b) { return a.compare(b) < 0; });
int id = 1;
for (auto& atom: mAtoms)
{
atom.setID(id);
++id;
}
updateAtomIndex();
}
AtomView Structure::waters() const
{
AtomView result;
......@@ -1670,11 +1704,11 @@ void Structure::swapAtoms(Atom& a1, Atom& a2)
auto l1 = r1.front()["label_atom_id"];
auto l2 = r2.front()["label_atom_id"];
std::swap(l1, l2);
l1.swap(l2);
auto l3 = r1.front()["auth_atom_id"];
auto l4 = r2.front()["auth_atom_id"];
std::swap(l3, l4);
l3.swap(l4);
}
void Structure::moveAtom(Atom& a, Point p)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment