Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
libcifpp
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
open
libcifpp
Commits
24fa80ba
Unverified
Commit
24fa80ba
authored
Aug 02, 2022
by
Maarten L. Hekkelman
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
parser just started working again, a bit
parent
3999d792
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
2891 additions
and
102 deletions
+2891
-102
include/cif++/v2/category.hpp
+173
-5
include/cif++/v2/datablock.hpp
+70
-26
include/cif++/v2/file.hpp
+102
-5
include/cif++/v2/item.hpp
+34
-26
include/cif++/v2/iterator.hpp
+4
-2
include/cif++/v2/parser.hpp
+1054
-0
include/cif++/v2/row.hpp
+87
-9
src/parser.cpp
+1327
-0
test/unit-v2-test.cpp
+40
-29
No files found.
include/cif++/v2/category.hpp
View file @
24fa80ba
...
...
@@ -432,22 +432,190 @@ class category_t
if
(
result
==
m_columns
.
size
())
{
const
ValidateItem
*
item
V
alidator
=
nullptr
;
const
ValidateItem
*
item
_v
alidator
=
nullptr
;
// if (mCatValidator != nullptr)
// {
// item
V
alidator = mCatValidator->getValidatorForItem(column_name);
// if (item
V
alidator == nullptr)
// m
V
alidator->reportError("tag " + std::string(column_name) + " not allowed in Category " + mName, false);
// item
_v
alidator = mCatValidator->getValidatorForItem(column_name);
// if (item
_v
alidator == nullptr)
// m
_v
alidator->reportError("tag " + std::string(column_name) + " not allowed in Category " + mName, false);
// }
m_columns
.
emplace_back
(
column_name
,
item
V
alidator
);
m_columns
.
emplace_back
(
column_name
,
item
_v
alidator
);
}
return
result
;
}
private
:
void
update_value
(
row
*
row
,
size_t
column
,
std
::
string_view
value
,
bool
updateLinked
,
bool
validate
=
true
)
{
auto
&
col
=
m_columns
[
column
];
const
char
*
oldValue
=
nullptr
;
for
(
auto
iv
=
row
->
m_head
;
iv
!=
nullptr
;
iv
=
iv
->
m_next
)
{
assert
(
iv
!=
iv
->
m_next
and
(
iv
->
m_next
==
nullptr
or
iv
!=
iv
->
m_next
->
m_next
));
if
(
iv
->
m_column_ix
==
column
)
{
oldValue
=
iv
->
c_str
();
break
;
}
}
if
(
oldValue
!=
nullptr
and
value
==
oldValue
)
// no need to update
return
;
std
::
string
oldStrValue
=
oldValue
?
oldValue
:
""
;
// // check the value
// if (col.m_validator and validate)
// (*col.m_validator)(value);
// If the field is part of the Key for this Category, remove it from the index
// before updating
bool
reinsert
=
false
;
// if (updateLinked and // an update of an Item's value
// cat->mIndex != nullptr and cat->keyFieldsByIndex().count(column))
// {
// reinsert = cat->mIndex->find(mData);
// if (reinsert)
// cat->mIndex->erase(mData);
// }
// first remove old value with cix
if
(
row
->
m_head
==
nullptr
)
;
// nothing to do
else
if
(
row
->
m_head
->
m_column_ix
==
column
)
{
auto
iv
=
row
->
m_head
;
row
->
m_head
=
iv
->
m_next
;
iv
->
m_next
=
nullptr
;
delete_item
(
iv
);
}
else
{
for
(
auto
iv
=
row
->
m_head
;
iv
->
m_next
!=
nullptr
;
iv
=
iv
->
m_next
)
{
if
(
iv
->
m_next
->
m_column_ix
!=
column
)
continue
;
auto
nv
=
iv
->
m_next
;
iv
->
m_next
=
nv
->
m_next
;
nv
->
m_next
=
nullptr
;
delete_item
(
nv
);
break
;
}
}
if
(
not
value
.
empty
())
{
auto
nv
=
create_item
(
column
,
value
);
if
(
row
->
m_head
==
nullptr
)
row
->
m_head
=
nv
;
else
{
auto
iv
=
row
->
m_head
;
while
(
iv
->
m_next
!=
nullptr
)
iv
=
iv
->
m_next
;
iv
->
m_next
=
nv
;
}
}
// if (reinsert)
// cat->mIndex->insert(mData);
// // see if we need to update any child categories that depend on this value
// auto iv = col.m_validator;
// if (not skipUpdateLinked and iv != nullptr and mCascade)
// {
// for (auto &&[childCat, linked] : cat->mChildLinks)
// {
// if (find(linked->mParentKeys.begin(), linked->mParentKeys.end(), iv->mTag) == linked->mParentKeys.end())
// continue;
// Condition cond;
// std::string childTag;
// for (size_t ix = 0; ix < linked->mParentKeys.size(); ++ix)
// {
// std::string pk = linked->mParentKeys[ix];
// std::string ck = linked->mChildKeys[ix];
// // TODO add code to *NOT* test mandatory fields for Empty
// if (pk == iv->mTag)
// {
// childTag = ck;
// cond = std::move(cond) && Key(ck) == oldStrValue;
// }
// else
// {
// const char *pk_value = (*this)[pk].c_str();
// if (*pk_value == 0)
// cond = std::move(cond) && Key(ck) == Empty();
// else
// cond = std::move(cond) && ((Key(ck) == pk_value) or Key(ck) == Empty());
// }
// }
// auto rows = childCat->find(std::move(cond));
// if (rows.empty())
// continue;
// // if (cif::VERBOSE > 2)
// // {
// // std::cerr << "Parent: " << linked->mParentCategory << " Child: " << linked->mChildCategory << std::endl
// // << cond << std::endl;
// // }
// // Now, suppose there are already rows in child that conform to the new value,
// // we then skip this renam
// Condition cond_n;
// for (size_t ix = 0; ix < linked->mParentKeys.size(); ++ix)
// {
// std::string pk = linked->mParentKeys[ix];
// std::string ck = linked->mChildKeys[ix];
// // TODO add code to *NOT* test mandatory fields for Empty
// if (pk == iv->mTag)
// cond_n = std::move(cond_n) && Key(ck) == value;
// else
// {
// const char *pk_value = (*this)[pk].c_str();
// if (*pk_value == 0)
// cond_n = std::move(cond_n) && Key(ck) == Empty();
// else
// cond_n = std::move(cond_n) && ((Key(ck) == pk_value) or Key(ck) == Empty());
// }
// }
// auto rows_n = childCat->find(std::move(cond_n));
// if (not rows_n.empty())
// {
// if (cif::VERBOSE > 0)
// std::cerr << "Will not rename in child category since there are already rows that link to the parent" << std::endl;
// continue;
// }
// for (auto &cr : rows)
// cr.assign(childTag, value, false);
// }
// }
}
private
:
using
char_allocator_type
=
typename
std
::
allocator_traits
<
Alloc
>::
template
rebind_alloc
<
char
>
;
using
char_allocator_traits
=
std
::
allocator_traits
<
char_allocator_type
>
;
...
...
include/cif++/v2/datablock.hpp
View file @
24fa80ba
...
...
@@ -34,17 +34,22 @@ namespace cif::v2
// --------------------------------------------------------------------
template
<
typename
Category
=
category
,
typename
Alloc
=
std
::
allocator
<
Category
>>
class
datablock_t
:
public
std
::
list
<
Category
,
Alloc
>
typename
Alloc
=
std
::
allocator
<
void
>
,
typename
Category
=
category_t
<
Alloc
>>
class
datablock_t
{
public
:
using
category_type
=
Category
;
using
base_type
=
std
::
list
<
category_type
,
Alloc
>
;
using
allocator_type
=
Alloc
;
datablock_t
(
const
std
::
string
&
name
,
const
allocator_type
&
alloc
=
allocator_type
())
:
base_type
(
alloc
)
using
category_allocator_type
=
typename
std
::
allocator_traits
<
Alloc
>::
template
rebind_alloc
<
category_type
>
;
using
category_type_list
=
std
::
list
<
category_type
,
category_allocator_type
>
;
using
iterator
=
category_type_list
::
iterator
;
using
const_iterator
=
category_type_list
::
const_iterator
;
datablock_t
(
std
::
string_view
name
,
const
allocator_type
&
alloc
=
allocator_type
())
:
m_categories
(
alloc
)
,
m_name
(
name
)
{
}
...
...
@@ -53,19 +58,19 @@ class datablock_t : public std::list<Category, Alloc>
datablock_t
(
datablock_t
&&
)
=
default
;
template
<
typename
Alloc2
>
datablock_t
(
const
datablock_t
&
db
,
const
Alloc2
&
a
)
:
base_type
(
db
,
a
)
,
m_name
(
db
.
m_name
)
{
}
//
template <typename Alloc2>
//
datablock_t(const datablock_t &db, const Alloc2 &a)
// : m_categories
(db, a)
//
, m_name(db.m_name)
//
{
//
}
template
<
typename
Alloc2
>
datablock_t
(
datablock_t
&&
db
,
const
Alloc2
&
a
)
:
base_type
(
std
::
move
(
db
),
a
)
,
m_name
(
db
.
m_name
)
{
}
//
template <typename Alloc2>
//
datablock_t(datablock_t &&db, const Alloc2 &a)
//
: base_type(std::move(db), a)
//
, m_name(db.m_name)
//
{
//
}
datablock_t
&
operator
=
(
const
datablock_t
&
)
=
default
;
datablock_t
&
operator
=
(
datablock_t
&&
)
=
default
;
...
...
@@ -78,19 +83,57 @@ class datablock_t : public std::list<Category, Alloc>
category_type
&
operator
[](
std
::
string_view
name
)
{
auto
i
=
std
::
find_if
(
this
->
begin
(),
this
->
end
(),
[
name
](
const
category_type
&
c
)
auto
i
=
std
::
find_if
(
m_categories
.
begin
(),
m_categories
.
end
(),
[
name
](
const
category_type
&
c
)
{
return
iequals
(
c
.
name
(),
name
);
});
if
(
i
==
this
->
end
())
i
=
this
->
emplace
(
name
);
return
*
i
;
if
(
i
!=
m_categories
.
end
())
return
*
i
;
m_categories
.
emplace_back
(
name
);
return
m_categories
.
back
();
}
const
category_type
&
operator
[](
std
::
string_view
name
)
const
{
static
const
category_type
s_empty
;
auto
i
=
std
::
find_if
(
this
->
begin
(),
this
->
end
(),
[
name
](
const
category_type
&
c
)
auto
i
=
std
::
find_if
(
m_categories
.
begin
(),
m_categories
.
end
(),
[
name
](
const
category_type
&
c
)
{
return
iequals
(
c
.
name
(),
name
);
});
return
i
==
this
->
end
()
?
s_empty
:
*
i
;
return
i
==
m_categories
.
end
()
?
s_empty
:
*
i
;
}
std
::
tuple
<
iterator
,
bool
>
emplace
(
std
::
string_view
name
)
{
bool
is_new
=
true
;
auto
i
=
m_categories
.
begin
();
while
(
i
!=
m_categories
.
end
())
{
if
(
iequals
(
name
,
i
->
name
()))
{
is_new
=
false
;
if
(
i
!=
m_categories
.
begin
())
{
auto
n
=
std
::
next
(
i
);
m_categories
.
splice
(
m_categories
.
begin
(),
m_categories
,
i
,
n
);
}
break
;
}
++
i
;
}
if
(
is_new
)
{
m_categories
.
emplace
(
m_categories
.
begin
(),
name
);
// m_categories.emplace(begin(), *this, std::string(name), mValidator);
// for (auto &cat : mCategories)
// cat.updateLinks();
}
return
std
::
make_tuple
(
m_categories
.
begin
(),
is_new
);
}
void
write
(
std
::
ostream
&
os
)
const
...
...
@@ -104,7 +147,7 @@ class datablock_t : public std::list<Category, Alloc>
// and if it exists, _AND_ we have a Validator, write out the
// audit_conform record.
for
(
auto
&
cat
:
*
thi
s
)
for
(
auto
&
cat
:
m_categorie
s
)
{
if
(
cat
.
name
()
!=
"entry"
)
continue
;
...
...
@@ -122,7 +165,7 @@ class datablock_t : public std::list<Category, Alloc>
break
;
}
for
(
auto
&
cat
:
*
thi
s
)
for
(
auto
&
cat
:
m_categorie
s
)
{
if
(
cat
.
name
()
!=
"entry"
and
cat
.
name
()
!=
"audit_conform"
)
cat
.
write
(
os
);
...
...
@@ -136,6 +179,7 @@ class datablock_t : public std::list<Category, Alloc>
}
private
:
category_type_list
m_categories
;
std
::
string
m_name
;
};
...
...
include/cif++/v2/file.hpp
View file @
24fa80ba
...
...
@@ -27,6 +27,7 @@
#pragma once
#include "datablock.hpp"
#include "parser.hpp"
namespace
cif
::
v2
{
...
...
@@ -34,25 +35,121 @@ namespace cif::v2
// --------------------------------------------------------------------
template
<
typename
Datablock
=
datablock
,
typename
Alloc
=
std
::
allocator
<
Datablock
>>
class
file_t
:
public
std
::
list
<
Datablock
,
Alloc
>
typename
Alloc
=
std
::
allocator
<
void
>
,
typename
Datablock
=
datablock_t
<
Alloc
>
,
typename
Category
=
typename
Datablock
::
category_type
>
class
file_t
{
public
:
using
value_type
=
Datablock
;
using
base_type
=
std
::
list
<
value_type
,
Alloc
>
;
using
allocator_type
=
Alloc
;
using
datablock_type
=
Datablock
;
using
category_type
=
typename
datablock_type
::
category_type
;
using
datablock_allocator_type
=
typename
std
::
allocator_traits
<
Alloc
>::
template
rebind_alloc
<
datablock_type
>
;
using
datablock_list
=
std
::
list
<
datablock_type
,
datablock_allocator_type
>
;
using
value_type
=
datablock_list
::
value_type
;
using
reference
=
datablock_list
::
reference
;
using
pointer
=
datablock_list
::
pointer
;
using
iterator
=
datablock_list
::
iterator
;
using
const_iterator
=
datablock_list
::
const_iterator
;
using
parser_type
=
parser_t
<
file_t
,
datablock_type
,
category_type
>
;
file_t
()
=
default
;
file_t
(
const
allocator_type
&
a
=
allocator_type
())
:
m_datablocks
(
a
)
{
}
file_t
(
std
::
istream
&
is
,
const
allocator_type
&
alloc
=
allocator_type
())
:
m_datablocks
(
alloc
)
{
load
(
is
);
}
file_t
(
const
file_t
&
)
=
default
;
file_t
(
file_t
&&
)
=
default
;
file_t
&
operator
=
(
const
file_t
&
)
=
default
;
file_t
&
operator
=
(
file_t
&&
)
=
default
;
datablock_type
&
operator
[](
std
::
string_view
name
)
{
auto
i
=
std
::
find_if
(
m_datablocks
.
begin
(),
m_datablocks
.
end
(),
[
name
](
const
datablock_type
&
c
)
{
return
iequals
(
c
.
name
(),
name
);
});
if
(
i
!=
m_datablocks
.
end
())
return
*
i
;
m_datablocks
.
emplace_back
(
name
);
return
m_datablocks
.
back
();
}
const
datablock_type
&
operator
[](
std
::
string_view
name
)
const
{
static
const
datablock_type
s_empty
;
auto
i
=
std
::
find_if
(
m_datablocks
.
begin
(),
m_datablocks
.
end
(),
[
name
](
const
datablock_type
&
c
)
{
return
iequals
(
c
.
name
(),
name
);
});
return
i
==
m_datablocks
.
end
()
?
s_empty
:
*
i
;
}
std
::
tuple
<
iterator
,
bool
>
emplace
(
std
::
string_view
name
)
{
bool
is_new
=
true
;
auto
i
=
m_datablocks
.
begin
();
while
(
i
!=
m_datablocks
.
end
())
{
if
(
iequals
(
name
,
i
->
name
()))
{
is_new
=
false
;
if
(
i
!=
m_datablocks
.
begin
())
{
auto
n
=
std
::
next
(
i
);
m_datablocks
.
splice
(
m_datablocks
.
begin
(),
m_datablocks
,
i
,
n
);
}
break
;
}
++
i
;
}
if
(
is_new
)
m_datablocks
.
emplace
(
m_datablocks
.
begin
(),
name
);
return
std
::
make_tuple
(
m_datablocks
.
begin
(),
is_new
);
}
bool
empty
()
const
{
return
m_datablocks
.
empty
();
}
size_t
size
()
const
{
return
m_datablocks
.
size
();
}
reference
front
()
{
return
m_datablocks
.
front
();
}
reference
back
()
{
return
m_datablocks
.
back
();
}
void
load
(
std
::
istream
&
is
)
{
// auto saved = mValidator;
// setValidator(nullptr);
parser_type
p
(
is
,
*
this
);
p
.
parseFile
();
// if (saved != nullptr)
// {
// setValidator(saved);
// (void)isValid();
// }
}
private
:
datablock_list
m_datablocks
;
};
using
file
=
file_t
<>
;
...
...
include/cif++/v2/item.hpp
View file @
24fa80ba
...
...
@@ -66,7 +66,7 @@ class item
auto
r
=
cif
::
to_chars
(
m_buffer
,
m_buffer
+
sizeof
(
m_buffer
)
-
1
,
value
,
cif
::
chars_format
::
fixed
,
precision
);
if
(
r
.
ec
!=
std
::
errc
())
throw
std
::
runtime_error
(
"Could not format number"
);
assert
(
r
.
ptr
>=
m_buffer
and
r
.
ptr
<
m_buffer
+
sizeof
(
m_buffer
));
*
r
.
ptr
=
0
;
m_value
=
std
::
string_view
(
m_buffer
,
r
.
ptr
-
m_buffer
);
...
...
@@ -94,7 +94,7 @@ class item
auto
r
=
std
::
to_chars
(
m_buffer
,
m_buffer
+
sizeof
(
m_buffer
)
-
1
,
value
);
if
(
r
.
ec
!=
std
::
errc
())
throw
std
::
runtime_error
(
"Could not format number"
);
assert
(
r
.
ptr
>=
m_buffer
and
r
.
ptr
<
m_buffer
+
sizeof
(
m_buffer
));
*
r
.
ptr
=
0
;
m_value
=
std
::
string_view
(
m_buffer
,
r
.
ptr
-
m_buffer
);
...
...
@@ -134,44 +134,52 @@ class item
private
:
std
::
string_view
m_name
;
std
::
string_view
m_value
;
char
m_buffer
[
64
];
// TODO: optimize this magic number, might be too large
char
m_buffer
[
64
];
// TODO: optimize this magic number, might be too large
};
// --------------------------------------------------------------------
// Transient object to access stored data
template
<
typename
Row
>
template
<
typename
Row
Handle
>
struct
item_handle
{
using
row_
type
=
Row
;
using
row_
handle_type
=
RowHandle
;
public
:
// conversion helper class
template
<
typename
T
,
typename
=
void
>
struct
item_value_as
;
template
<
typename
T
,
std
::
enable_if_t
<
std
::
is_arithmetic_v
<
T
>
,
int
>
=
0
>
template
<
typename
T
>
item_handle
&
operator
=
(
const
T
&
value
)
{
this
->
operator
=
(
std
::
to_string
(
value
));
item
v
{
""
,
value
};
m_row_handle
.
assign
(
m_column
,
v
.
value
(),
false
);
return
*
this
;
}
template
<
typename
T
>
item_handle
&
operator
=
(
const
std
::
optional
<
T
>
&
value
)
{
if
(
value
)
this
->
operator
=
(
*
value
);
else
this
->
operator
=
(
"?"
);
return
*
this
;
}
// template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
// item_handle &operator=(const T &value)
// {
// this->operator=(std::to_string(value));
// return *this;
// }
item_handle
&
operator
=
(
const
std
::
string
&
value
)
{
m_row
.
assign
(
m_column
,
value
,
false
);
return
*
this
;
}
// template <typename T>
// item_handle &operator=(const std::optional<T> &value)
// {
// if (value)
// this->operator=(*value);
// else
// this->operator=("?");
// return *this;
// }
// item_handle &operator=(std::string_view value)
// {
// m_row_handle.assign(m_column, value, false);
// return *this;
// }
template
<
typename
...
Ts
>
void
os
(
const
Ts
&
...
v
)
...
...
@@ -227,7 +235,7 @@ struct item_handle
// const char *c_str() const
// {
// for (auto iv = m_row.m_head; iv != nullptr; iv = iv->m_next)
// for (auto iv = m_row
_handle
.m_head; iv != nullptr; iv = iv->m_next)
// {
// if (iv->m_column_ix == m_column)
// return iv->m_text;
...
...
@@ -238,7 +246,7 @@ struct item_handle
std
::
string_view
text
()
const
{
for
(
auto
iv
=
m_row
.
m_head
;
iv
!=
nullptr
;
iv
=
iv
->
m_next
)
for
(
auto
iv
=
m_row
_handle
.
m_row
->
m_head
;
iv
!=
nullptr
;
iv
=
iv
->
m_next
)
{
if
(
iv
->
m_column_ix
==
m_column
)
return
iv
->
text
();
...
...
@@ -250,15 +258,15 @@ struct item_handle
// bool operator!=(const std::string &s) const { return s != c_str(); }
// bool operator==(const std::string &s) const { return s == c_str(); }
item_handle
(
uint16_t
column
,
row_type
&
row
)
item_handle
(
uint16_t
column
,
row_
handle_
type
&
row
)
:
m_column
(
column
)
,
m_row
(
row
)
,
m_row
_handle
(
row
)
{
}
private
:
uint16_t
m_column
;
row_
type
&
m_row
;
row_
handle_type
&
m_row_handle
;
// bool mConst = false;
static
constexpr
const
char
*
s_empty_result
=
""
;
...
...
include/cif++/v2/iterator.hpp
View file @
24fa80ba
...
...
@@ -55,6 +55,8 @@ class iterator_impl
using
pointer
=
std
::
conditional_t
<
N
==
0
,
row_handle_type
,
value_type
*>
;
using
reference
=
std
::
conditional_t
<
N
==
0
,
row_handle_type
,
value_type
&>
;
iterator_impl
()
=
default
;
iterator_impl
(
const
iterator_impl
&
rhs
)
=
default
;
template
<
typename
C2
,
typename
...
T2s
>
...
...
@@ -188,8 +190,8 @@ class iterator_impl
return
{};
}
category_type
*
m_category
;
row_type
*
m_current
;
category_type
*
m_category
=
nullptr
;
row_type
*
m_current
=
nullptr
;
value_type
m_value
;
std
::
array
<
size_t
,
N
>
m_column_ix
;
};
...
...
include/cif++/v2/parser.hpp
0 → 100644
View file @
24fa80ba
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <map>
#include <stack>
namespace
cif
::
v2
{
// --------------------------------------------------------------------
class
parse_error
:
public
std
::
runtime_error
{
public
:
parse_error
(
uint32_t
line_nr
,
const
std
::
string
&
message
)
:
std
::
runtime_error
(
"parse error at line "
+
std
::
to_string
(
line_nr
)
+
": "
+
message
)
{
}
};
// --------------------------------------------------------------------
class
sac_parser
{
public
:
using
DatablockIndex
=
std
::
map
<
std
::
string
,
std
::
size_t
>
;
sac_parser
(
std
::
istream
&
is
)
:
mData
(
is
)
{
m_validate
=
true
;
m_line_nr
=
1
;
m_bol
=
true
;
// if (init)
m_lookahead
=
get_next_token
();
}
virtual
~
sac_parser
()
=
default
;
enum
CharTraitsMask
:
uint8_t
{
kOrdinaryMask
=
1
<<
0
,
kNonBlankMask
=
1
<<
1
,
kTextLeadMask
=
1
<<
2
,
kAnyPrintMask
=
1
<<
3
};
static
constexpr
bool
is_white
(
int
ch
)
{
return
std
::
isspace
(
ch
)
or
ch
==
'#'
;
}
static
constexpr
bool
is_ordinary
(
int
ch
)
{
return
ch
>=
0x20
and
ch
<=
0x7f
and
(
kCharTraitsTable
[
ch
-
0x20
]
&
kOrdinaryMask
)
!=
0
;
}
static
constexpr
bool
is_non_blank
(
int
ch
)
{
return
ch
>
0x20
and
ch
<=
0x7f
and
(
kCharTraitsTable
[
ch
-
0x20
]
&
kNonBlankMask
)
!=
0
;
}
static
constexpr
bool
is_text_lead
(
int
ch
)
{
return
ch
>=
0x20
and
ch
<=
0x7f
and
(
kCharTraitsTable
[
ch
-
0x20
]
&
kTextLeadMask
)
!=
0
;
}
static
constexpr
bool
is_any_print
(
int
ch
)
{
return
ch
==
'\t'
or
(
ch
>=
0x20
and
ch
<=
0x7f
and
(
kCharTraitsTable
[
ch
-
0x20
]
&
kAnyPrintMask
)
!=
0
);
}
static
bool
is_unquoted_string
(
const
char
*
s
)
{
auto
ss
=
s
;
bool
result
=
is_ordinary
(
*
s
++
);
while
(
result
and
*
s
!=
0
)
{
result
=
is_non_blank
(
*
s
);
++
s
;
}
// but be careful it does not contain e.g. stop_
if
(
result
)
{
static
const
std
::
regex
reservedRx
(
R"((^(?:data|save)|.*(?:loop|stop|global))_.+)"
,
std
::
regex_constants
::
icase
);
result
=
not
std
::
regex_match
(
ss
,
reservedRx
);
}
return
result
;
}
protected
:
static
constexpr
uint8_t
kCharTraitsTable
[
128
]
=
{
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
14
,
15
,
14
,
14
,
14
,
15
,
15
,
14
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
// 2
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
10
,
15
,
15
,
15
,
15
,
// 3
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
// 4
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
14
,
15
,
14
,
15
,
14
,
// 5
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
// 6
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
0
,
// 7
};
enum
class
CIFToken
{
Unknown
,
Eof
,
DATA
,
LOOP
,
GLOBAL
,
SAVE
,
STOP
,
Tag
,
Value
};
static
constexpr
const
char
*
get_token_name
(
CIFToken
token
)
{
switch
(
token
)
{
case
CIFToken
:
:
Unknown
:
return
"Unknown"
;
case
CIFToken
:
:
Eof
:
return
"Eof"
;
case
CIFToken
:
:
DATA
:
return
"DATA"
;
case
CIFToken
:
:
LOOP
:
return
"LOOP"
;
case
CIFToken
:
:
GLOBAL
:
return
"GLOBAL"
;
case
CIFToken
:
:
SAVE
:
return
"SAVE"
;
case
CIFToken
:
:
STOP
:
return
"STOP"
;
case
CIFToken
:
:
Tag
:
return
"Tag"
;
case
CIFToken
:
:
Value
:
return
"Value"
;
}
}
enum
class
CIFValue
{
Int
,
Float
,
Numeric
,
String
,
TextField
,
Inapplicable
,
Unknown
};
static
constexpr
const
char
*
get_value_name
(
CIFValue
type
)
{
switch
(
type
)
{
case
CIFValue
:
:
Int
:
return
"Int"
;
case
CIFValue
:
:
Float
:
return
"Float"
;
case
CIFValue
:
:
Numeric
:
return
"Numeric"
;
case
CIFValue
:
:
String
:
return
"String"
;
case
CIFValue
:
:
TextField
:
return
"TextField"
;
case
CIFValue
:
:
Inapplicable
:
return
"Inapplicable"
;
case
CIFValue
:
:
Unknown
:
return
"Unknown"
;
}
}
// getNextChar takes a char from the buffer, or if it is empty
// from the istream. This function also does carriage/linefeed
// translation.
int
getNextChar
()
{
int
result
;
if
(
mBuffer
.
empty
())
result
=
mData
.
get
();
else
{
result
=
mBuffer
.
top
();
mBuffer
.
pop
();
}
// very simple CR/LF translation into LF
if
(
result
==
'\r'
)
{
int
lookahead
=
mData
.
get
();
if
(
lookahead
!=
'\n'
)
mBuffer
.
push
(
lookahead
);
result
=
'\n'
;
}
mTokenValue
+=
static_cast
<
char
>
(
result
);
if
(
result
==
'\n'
)
++
m_line_nr
;
if
(
VERBOSE
>=
6
)
{
std
::
cerr
<<
"getNextChar => "
;
if
(
iscntrl
(
result
)
or
not
isprint
(
result
))
std
::
cerr
<<
int
(
result
)
<<
std
::
endl
;
else
std
::
cerr
<<
char
(
result
)
<<
std
::
endl
;
}
return
result
;
}
void
retract
()
{
assert
(
not
mTokenValue
.
empty
());
char
ch
=
mTokenValue
.
back
();
if
(
ch
==
'\n'
)
--
m_line_nr
;
mBuffer
.
push
(
ch
);
mTokenValue
.
pop_back
();
}
int
restart
(
int
start
)
{
int
result
=
0
;
while
(
not
mTokenValue
.
empty
())
retract
();
switch
(
start
)
{
case
State
:
:
Start
:
result
=
State
::
Float
;
break
;
case
State
:
:
Float
:
result
=
State
::
Int
;
break
;
case
State
:
:
Int
:
result
=
State
::
Value
;
break
;
default:
error
(
"Invalid state in SacParser"
);
}
m_bol
=
false
;
return
result
;
}
CIFToken
get_next_token
()
{
const
auto
kEOF
=
std
::
char_traits
<
char
>::
eof
();
CIFToken
result
=
CIFToken
::
Unknown
;
int
quoteChar
=
0
;
int
state
=
State
::
Start
,
start
=
State
::
Start
;
m_bol
=
false
;
mTokenValue
.
clear
();
mTokenType
=
CIFValue
::
Unknown
;
while
(
result
==
CIFToken
::
Unknown
)
{
auto
ch
=
getNextChar
();
switch
(
state
)
{
case
State
:
:
Start
:
if
(
ch
==
kEOF
)
result
=
CIFToken
::
Eof
;
else
if
(
ch
==
'\n'
)
{
m_bol
=
true
;
state
=
State
::
White
;
}
else
if
(
ch
==
' '
or
ch
==
'\t'
)
state
=
State
::
White
;
else
if
(
ch
==
'#'
)
state
=
State
::
Comment
;
else
if
(
ch
==
'_'
)
state
=
State
::
Tag
;
else
if
(
ch
==
';'
and
m_bol
)
state
=
State
::
TextField
;
else
if
(
ch
==
'\''
or
ch
==
'"'
)
{
quoteChar
=
ch
;
state
=
State
::
QuotedString
;
}
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
White
:
if
(
ch
==
kEOF
)
result
=
CIFToken
::
Eof
;
else
if
(
not
isspace
(
ch
))
{
state
=
State
::
Start
;
retract
();
mTokenValue
.
clear
();
}
else
m_bol
=
(
ch
==
'\n'
);
break
;
case
State
:
:
Comment
:
if
(
ch
==
'\n'
)
{
state
=
State
::
Start
;
m_bol
=
true
;
mTokenValue
.
clear
();
}
else
if
(
ch
==
kEOF
)
result
=
CIFToken
::
Eof
;
else
if
(
not
is_any_print
(
ch
))
error
(
"invalid character in comment"
);
break
;
case
State
:
:
TextField
:
if
(
ch
==
'\n'
)
state
=
State
::
TextField
+
1
;
else
if
(
ch
==
kEOF
)
error
(
"unterminated textfield"
);
else
if
(
not
is_any_print
(
ch
))
warning
(
"invalid character in text field '"
+
std
::
string
({
static_cast
<
char
>
(
ch
)})
+
"' ("
+
std
::
to_string
((
int
)
ch
)
+
")"
);
break
;
case
State
:
:
TextField
+
1
:
if
(
is_text_lead
(
ch
)
or
ch
==
' '
or
ch
==
'\t'
)
state
=
State
::
TextField
;
else
if
(
ch
==
';'
)
{
assert
(
mTokenValue
.
length
()
>=
2
);
mTokenValue
=
mTokenValue
.
substr
(
1
,
mTokenValue
.
length
()
-
3
);
mTokenType
=
CIFValue
::
TextField
;
result
=
CIFToken
::
Value
;
}
else
if
(
ch
==
kEOF
)
error
(
"unterminated textfield"
);
else
if
(
ch
!=
'\n'
)
error
(
"invalid character in text field"
);
break
;
case
State
:
:
QuotedString
:
if
(
ch
==
kEOF
)
error
(
"unterminated quoted string"
);
else
if
(
ch
==
quoteChar
)
state
=
State
::
QuotedStringQuote
;
else
if
(
not
is_any_print
(
ch
))
warning
(
"invalid character in quoted string: '"
+
std
::
string
({
static_cast
<
char
>
(
ch
)})
+
'\''
);
break
;
case
State
:
:
QuotedStringQuote
:
if
(
is_white
(
ch
))
{
retract
();
result
=
CIFToken
::
Value
;
mTokenType
=
CIFValue
::
String
;
if
(
mTokenValue
.
length
()
<
2
)
error
(
"Invalid quoted string token"
);
mTokenValue
=
mTokenValue
.
substr
(
1
,
mTokenValue
.
length
()
-
2
);
}
else
if
(
ch
==
quoteChar
)
;
else
if
(
is_any_print
(
ch
))
state
=
State
::
QuotedString
;
else
if
(
ch
==
kEOF
)
error
(
"unterminated quoted string"
);
else
error
(
"invalid character in quoted string"
);
break
;
case
State
:
:
Tag
:
if
(
not
is_non_blank
(
ch
))
{
retract
();
result
=
CIFToken
::
Tag
;
}
break
;
case
State
:
:
Float
:
if
(
ch
==
'+'
or
ch
==
'-'
)
{
state
=
State
::
Float
+
1
;
}
else
if
(
isdigit
(
ch
))
state
=
State
::
Float
+
1
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Float
+
1
:
// if (ch == '(') // numeric???
// mState = State::NumericSuffix;
// else
if
(
ch
==
'.'
)
state
=
State
::
Float
+
2
;
else
if
(
tolower
(
ch
)
==
'e'
)
state
=
State
::
Float
+
3
;
else
if
(
is_white
(
ch
)
or
ch
==
kEOF
)
{
retract
();
result
=
CIFToken
::
Value
;
mTokenType
=
CIFValue
::
Int
;
}
else
state
=
start
=
restart
(
start
);
break
;
// parsed '.'
case
State
:
:
Float
+
2
:
if
(
tolower
(
ch
)
==
'e'
)
state
=
State
::
Float
+
3
;
else
if
(
is_white
(
ch
)
or
ch
==
kEOF
)
{
retract
();
result
=
CIFToken
::
Value
;
mTokenType
=
CIFValue
::
Float
;
}
else
state
=
start
=
restart
(
start
);
break
;
// parsed 'e'
case
State
:
:
Float
+
3
:
if
(
ch
==
'-'
or
ch
==
'+'
)
state
=
State
::
Float
+
4
;
else
if
(
isdigit
(
ch
))
state
=
State
::
Float
+
5
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Float
+
4
:
if
(
isdigit
(
ch
))
state
=
State
::
Float
+
5
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Float
+
5
:
if
(
is_white
(
ch
)
or
ch
==
kEOF
)
{
retract
();
result
=
CIFToken
::
Value
;
mTokenType
=
CIFValue
::
Float
;
}
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Int
:
if
(
isdigit
(
ch
)
or
ch
==
'+'
or
ch
==
'-'
)
state
=
State
::
Int
+
1
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Int
+
1
:
if
(
is_white
(
ch
)
or
ch
==
kEOF
)
{
retract
();
result
=
CIFToken
::
Value
;
mTokenType
=
CIFValue
::
Int
;
}
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Value
:
if
(
ch
==
'_'
)
{
std
::
string
s
=
toLowerCopy
(
mTokenValue
);
if
(
s
==
"global_"
)
result
=
CIFToken
::
GLOBAL
;
else
if
(
s
==
"stop_"
)
result
=
CIFToken
::
STOP
;
else
if
(
s
==
"loop_"
)
result
=
CIFToken
::
LOOP
;
else
if
(
s
==
"data_"
)
{
state
=
State
::
DATA
;
continue
;
}
else
if
(
s
==
"save_"
)
{
state
=
State
::
SAVE
;
continue
;
}
}
if
(
result
==
CIFToken
::
Unknown
and
not
is_non_blank
(
ch
))
{
retract
();
result
=
CIFToken
::
Value
;
if
(
mTokenValue
==
"."
)
mTokenType
=
CIFValue
::
Inapplicable
;
else
if
(
mTokenValue
==
"?"
)
{
mTokenType
=
CIFValue
::
Unknown
;
mTokenValue
.
clear
();
}
}
break
;
case
State
:
:
DATA
:
case
State
:
:
SAVE
:
if
(
not
is_non_blank
(
ch
))
{
retract
();
if
(
state
==
State
::
DATA
)
result
=
CIFToken
::
DATA
;
else
result
=
CIFToken
::
SAVE
;
mTokenValue
.
erase
(
mTokenValue
.
begin
(),
mTokenValue
.
begin
()
+
5
);
}
break
;
default:
assert
(
false
);
error
(
"Invalid state in get_next_token"
);
break
;
}
}
if
(
VERBOSE
>=
5
)
{
std
::
cerr
<<
get_token_name
(
result
);
if
(
mTokenType
!=
CIFValue
::
Unknown
)
std
::
cerr
<<
' '
<<
get_value_name
(
mTokenType
);
if
(
result
!=
CIFToken
::
Eof
)
std
::
cerr
<<
" "
<<
std
::
quoted
(
mTokenValue
);
std
::
cerr
<<
std
::
endl
;
}
return
result
;
}
void
match
(
CIFToken
token
)
{
if
(
m_lookahead
!=
token
)
error
(
std
::
string
(
"Unexpected token, expected "
)
+
get_token_name
(
token
)
+
" but found "
+
get_token_name
(
m_lookahead
));
m_lookahead
=
get_next_token
();
}
public
:
bool
parseSingleDatablock
(
const
std
::
string
&
datablock
)
{
// first locate the start, as fast as we can
auto
&
sb
=
*
mData
.
rdbuf
();
enum
{
start
,
comment
,
string
,
string_quote
,
qstring
,
data
}
state
=
start
;
int
quote
=
0
;
bool
bol
=
true
;
std
::
string
dblk
=
"data_"
+
datablock
;
std
::
string
::
size_type
si
=
0
;
bool
found
=
false
;
for
(
auto
ch
=
sb
.
sbumpc
();
not
found
and
ch
!=
std
::
streambuf
::
traits_type
::
eof
();
ch
=
sb
.
sbumpc
())
{
switch
(
state
)
{
case
start
:
switch
(
ch
)
{
case
'#'
:
state
=
comment
;
break
;
case
'd'
:
case
'D'
:
state
=
data
;
si
=
1
;
break
;
case
'\''
:
case
'"'
:
state
=
string
;
quote
=
ch
;
break
;
case
';'
:
if
(
bol
)
state
=
qstring
;
break
;
}
break
;
case
comment
:
if
(
ch
==
'\n'
)
state
=
start
;
break
;
case
string
:
if
(
ch
==
quote
)
state
=
string_quote
;
break
;
case
string_quote
:
if
(
std
::
isspace
(
ch
))
state
=
start
;
else
state
=
string
;
break
;
case
qstring
:
if
(
ch
==
';'
and
bol
)
state
=
start
;
break
;
case
data
:
if
(
isspace
(
ch
)
and
dblk
[
si
]
==
0
)
found
=
true
;
else
if
(
dblk
[
si
++
]
!=
ch
)
state
=
start
;
break
;
}
bol
=
(
ch
==
'\n'
);
}
if
(
found
)
{
produceDatablock
(
datablock
);
m_lookahead
=
get_next_token
();
parseDataBlock
();
}
return
found
;
}
DatablockIndex
indexDatablocks
()
{
DatablockIndex
index
;
// first locate the start, as fast as we can
auto
&
sb
=
*
mData
.
rdbuf
();
enum
{
start
,
comment
,
string
,
string_quote
,
qstring
,
data
,
data_name
}
state
=
start
;
int
quote
=
0
;
bool
bol
=
true
;
const
char
dblk
[]
=
"data_"
;
std
::
string
::
size_type
si
=
0
;
std
::
string
datablock
;
for
(
auto
ch
=
sb
.
sbumpc
();
ch
!=
std
::
streambuf
::
traits_type
::
eof
();
ch
=
sb
.
sbumpc
())
{
switch
(
state
)
{
case
start
:
switch
(
ch
)
{
case
'#'
:
state
=
comment
;
break
;
case
'd'
:
case
'D'
:
state
=
data
;
si
=
1
;
break
;
case
'\''
:
case
'"'
:
state
=
string
;
quote
=
ch
;
break
;
case
';'
:
if
(
bol
)
state
=
qstring
;
break
;
}
break
;
case
comment
:
if
(
ch
==
'\n'
)
state
=
start
;
break
;
case
string
:
if
(
ch
==
quote
)
state
=
string_quote
;
break
;
case
string_quote
:
if
(
std
::
isspace
(
ch
))
state
=
start
;
else
state
=
string
;
break
;
case
qstring
:
if
(
ch
==
';'
and
bol
)
state
=
start
;
break
;
case
data
:
if
(
dblk
[
si
]
==
0
and
is_non_blank
(
ch
))
{
datablock
=
{
static_cast
<
char
>
(
ch
)};
state
=
data_name
;
}
else
if
(
dblk
[
si
++
]
!=
ch
)
state
=
start
;
break
;
case
data_name
:
if
(
is_non_blank
(
ch
))
datablock
.
insert
(
datablock
.
end
(),
char
(
ch
));
else
if
(
isspace
(
ch
))
{
if
(
not
datablock
.
empty
())
index
[
datablock
]
=
mData
.
tellg
();
state
=
start
;
}
else
state
=
start
;
break
;
}
bol
=
(
ch
==
'\n'
);
}
return
index
;
}
bool
parseSingleDatablock
(
const
std
::
string
&
datablock
,
const
DatablockIndex
&
index
)
{
bool
result
=
false
;
auto
i
=
index
.
find
(
datablock
);
if
(
i
!=
index
.
end
())
{
mData
.
seekg
(
i
->
second
);
produceDatablock
(
datablock
);
m_lookahead
=
get_next_token
();
parseDataBlock
();
result
=
true
;
}
return
result
;
}
void
parseFile
()
{
while
(
m_lookahead
!=
CIFToken
::
Eof
)
{
switch
(
m_lookahead
)
{
case
CIFToken
:
:
GLOBAL
:
parseGlobal
();
break
;
case
CIFToken
:
:
DATA
:
produceDatablock
(
mTokenValue
);
match
(
CIFToken
::
DATA
);
parseDataBlock
();
break
;
default
:
error
(
"This file does not seem to be an mmCIF file"
);
break
;
}
}
}
protected
:
void
parseGlobal
()
{
match
(
CIFToken
::
GLOBAL
);
while
(
m_lookahead
==
CIFToken
::
Tag
)
{
match
(
CIFToken
::
Tag
);
match
(
CIFToken
::
Value
);
}
}
void
parseDataBlock
()
{
std
::
string
cat
;
while
(
m_lookahead
==
CIFToken
::
LOOP
or
m_lookahead
==
CIFToken
::
Tag
or
m_lookahead
==
CIFToken
::
SAVE
)
{
switch
(
m_lookahead
)
{
case
CIFToken
:
:
LOOP
:
{
cat
.
clear
();
// should start a new category
match
(
CIFToken
::
LOOP
);
std
::
vector
<
std
::
string
>
tags
;
while
(
m_lookahead
==
CIFToken
::
Tag
)
{
std
::
string
catName
,
itemName
;
std
::
tie
(
catName
,
itemName
)
=
splitTagName
(
mTokenValue
);
if
(
cat
.
empty
())
{
produceCategory
(
catName
);
cat
=
catName
;
}
else
if
(
not
iequals
(
cat
,
catName
))
error
(
"inconsistent categories in loop_"
);
tags
.
push_back
(
itemName
);
match
(
CIFToken
::
Tag
);
}
while
(
m_lookahead
==
CIFToken
::
Value
)
{
produceRow
();
for
(
auto
tag
:
tags
)
{
produceItem
(
cat
,
tag
,
mTokenValue
);
match
(
CIFToken
::
Value
);
}
}
cat
.
clear
();
break
;
}
case
CIFToken
:
:
Tag
:
{
std
::
string
catName
,
itemName
;
std
::
tie
(
catName
,
itemName
)
=
splitTagName
(
mTokenValue
);
if
(
not
iequals
(
cat
,
catName
))
{
produceCategory
(
catName
);
cat
=
catName
;
produceRow
();
}
match
(
CIFToken
::
Tag
);
produceItem
(
cat
,
itemName
,
mTokenValue
);
match
(
CIFToken
::
Value
);
break
;
}
case
CIFToken
:
:
SAVE
:
parseSaveFrame
();
break
;
default
:
assert
(
false
);
break
;
}
}
}
virtual
void
parseSaveFrame
()
{
error
(
"A regular CIF file should not contain a save frame"
);
}
void
error
(
const
std
::
string
&
msg
)
{
throw
parse_error
(
m_line_nr
,
msg
);
}
void
warning
(
const
std
::
string
&
msg
)
{
std
::
cerr
<<
"parser warning at line"
<<
m_line_nr
<<
": "
<<
msg
<<
std
::
endl
;
}
// production methods, these are pure virtual here
virtual
void
produceDatablock
(
const
std
::
string
&
name
)
=
0
;
virtual
void
produceCategory
(
const
std
::
string
&
name
)
=
0
;
virtual
void
produceRow
()
=
0
;
virtual
void
produceItem
(
const
std
::
string
&
category
,
const
std
::
string
&
item
,
const
std
::
string
&
value
)
=
0
;
protected
:
enum
State
{
Start
,
White
,
Comment
,
QuestionMark
,
Dot
,
QuotedString
,
QuotedStringQuote
,
UnquotedString
,
Tag
,
TextField
,
Float
=
100
,
Int
=
110
,
Value
=
300
,
DATA
,
SAVE
};
std
::
istream
&
mData
;
// Parser state
bool
m_validate
;
uint32_t
m_line_nr
;
bool
m_bol
;
CIFToken
m_lookahead
;
std
::
string
mTokenValue
;
CIFValue
mTokenType
;
std
::
stack
<
int
>
mBuffer
;
};
// --------------------------------------------------------------------
template
<
typename
File
,
typename
Datablock
,
typename
Category
>
class
parser_t
:
public
sac_parser
{
public
:
using
file_type
=
File
;
using
datablock_type
=
Datablock
;
using
category_type
=
Category
;
using
row_handle_type
=
category_type
::
reference
;
parser_t
(
std
::
istream
&
is
,
file_type
&
file
)
:
sac_parser
(
is
)
,
m_file
(
file
)
{
}
void
produceDatablock
(
const
std
::
string
&
name
)
override
{
std
::
tie
(
m_datablock
,
std
::
ignore
)
=
m_file
.
emplace
(
name
);
}
void
produceCategory
(
const
std
::
string
&
name
)
override
{
if
(
VERBOSE
>=
4
)
std
::
cerr
<<
"producing category "
<<
name
<<
std
::
endl
;
std
::
tie
(
m_category
,
std
::
ignore
)
=
m_datablock
->
emplace
(
name
);
}
void
produceRow
()
override
{
if
(
VERBOSE
>=
4
)
std
::
cerr
<<
"producing row for category "
<<
m_category
->
name
()
<<
std
::
endl
;
m_category
->
emplace
({});
m_row
=
m_category
->
back
();
// m_row.lineNr(m_line_nr);
}
void
produceItem
(
const
std
::
string
&
category
,
const
std
::
string
&
item
,
const
std
::
string
&
value
)
override
{
if
(
VERBOSE
>=
4
)
std
::
cerr
<<
"producing _"
<<
category
<<
'.'
<<
item
<<
" -> "
<<
value
<<
std
::
endl
;
if
(
not
iequals
(
category
,
m_category
->
name
()))
error
(
"inconsistent categories in loop_"
);
m_row
[
item
]
=
mTokenValue
;
}
protected
:
file_type
&
m_file
;
file_type
::
iterator
m_datablock
;
datablock_type
::
iterator
m_category
;
row_handle_type
m_row
;
};
// class Parser : public SacParser
// {
// public:
// Parser(std::istream &is, File &f, bool init = true);
// virtual void produceDatablock(const std::string &name);
// virtual void produceCategory(const std::string &name);
// virtual void produceRow();
// virtual void produceItem(const std::string &category, const std::string &item, const std::string &value);
// protected:
// File &mFile;
// Datablock *mDataBlock;
// Datablock::iterator m_category;
// Row mRow;
// };
// // --------------------------------------------------------------------
// class DictParser : public Parser
// {
// public:
// DictParser(Validator &validator, std::istream &is);
// ~DictParser();
// void loadDictionary();
// private:
// virtual void parseSaveFrame();
// bool collectItemTypes();
// void linkItems();
// Validator &mValidator;
// File mFile;
// struct DictParserDataImpl *mImpl;
// bool mCollectedItemTypes = false;
// };
}
// namespace cif::v2
include/cif++/v2/row.hpp
View file @
24fa80ba
...
...
@@ -120,9 +120,14 @@ class row_handle
using
category_type
=
Category
;
using
row_type
=
std
::
conditional_t
<
std
::
is_const_v
<
category_type
>
,
const
typename
category_type
::
row
,
typename
category_type
::
row
>
;
using
item_handle_type
=
item_handle
<
row_handle
>
;
template
<
typename
>
friend
class
row_handle
;
template
<
typename
>
friend
class
item_handle
;
row_handle
()
=
default
;
row_handle
(
const
row_handle
&
)
=
default
;
...
...
@@ -149,24 +154,24 @@ class row_handle
return
m_cat
!=
nullptr
and
m_row
!=
nullptr
;
}
item_handle
<
row_type
>
operator
[](
uint32_t
column_ix
)
item_handle
_type
operator
[](
uint32_t
column_ix
)
{
return
item_handle
<
row_type
>
(
column_ix
,
*
m_row
);
return
item_handle
_type
(
column_ix
,
*
this
);
}
const
item_handle
<
const
row_type
>
operator
[](
uint32_t
column_ix
)
const
const
item_handle
_type
operator
[](
uint32_t
column_ix
)
const
{
return
item_handle
<
const
row_type
>
(
column_ix
,
*
m_row
);
return
item_handle
_type
(
column_ix
,
const_cast
<
row_handle
&>
(
*
this
)
);
}
item_handle
<
row_type
>
operator
[](
std
::
string_view
column_name
)
item_handle
_type
operator
[](
std
::
string_view
column_name
)
{
return
item_handle
<
row_type
>
(
get_column_ix
(
column_name
),
*
m_row
);
return
item_handle
_type
(
add_column
(
column_name
),
*
this
);
}
const
item_handle
<
const
row_type
>
operator
[](
std
::
string_view
column_name
)
const
const
item_handle
_type
operator
[](
std
::
string_view
column_name
)
const
{
return
item_handle
<
const
row_type
>
(
get_column_ix
(
column_name
),
*
m_row
);
return
item_handle
_type
(
get_column_ix
(
column_name
),
*
this
);
}
template
<
typename
...
Ts
,
size_t
N
>
...
...
@@ -186,12 +191,85 @@ class row_handle
return
detail
::
get_row_result
<
category_type
,
C
...
>
(
*
this
,
{
get_column_ix
(
columns
)...});
}
void
assign
(
const
std
::
vector
<
item
>
&
values
)
{
// std::map<std::string, std::tuple<size_t, std::string, std::string>> changed;
for
(
auto
&
value
:
values
)
{
assign
(
value
,
true
);
// auto columnIx = cat->add_column(value.name());
// auto &col = cat->m_columns[columnIx];
// std::string tag = col.mValidator ? col.mValidator->mTag : std::to_string(columnIx);
// changed[tag] = std::make_tuple(columnIx, operator[](columnIx).c_str(), value.value());
// assign(columnIx, value.value(), true);
}
// // see if we need to update any child categories that depend on these values
// // auto iv = col.mValidator;
// if (mCascade)
// {
// for (auto &&[childCat, linked] : cat->mChildLinks)
// {
// Condition cond;
// std::string childTag;
// std::vector<Item> newValues;
// for (size_t ix = 0; ix < linked->mParentKeys.size(); ++ix)
// {
// std::string pk = linked->mParentKeys[ix];
// std::string ck = linked->mChildKeys[ix];
// if (changed.count(pk) > 0)
// {
// childTag = ck;
// cond = std::move(cond) && (Key(ck) == std::get<1>(changed[pk]));
// newValues.emplace_back(ck, std::get<2>(changed[pk]));
// }
// else
// {
// const char *value = (*this)[pk].c_str();
// cond = std::move(cond) && (Key(ck) == value);
// }
// }
// auto rows = childCat->find(std::move(cond));
// for (auto &cr : rows)
// cr.assign(newValues);
// }
// }
}
void
assign
(
std
::
string_view
name
,
std
::
string_view
value
,
bool
updateLinked
,
bool
validate
=
true
)
{
assign
(
m_cat
->
add_column
(
name
),
value
,
updateLinked
,
validate
);
}
void
assign
(
size_t
column
,
std
::
string_view
value
,
bool
updateLinked
,
bool
validate
=
true
)
{
m_cat
->
update_value
(
m_row
,
column
,
value
,
updateLinked
,
validate
);
}
private
:
uint
32
_t
get_column_ix
(
std
::
string_view
name
)
const
uint
16
_t
get_column_ix
(
std
::
string_view
name
)
const
{
return
m_cat
->
get_column_ix
(
name
);
}
uint16_t
add_column
(
std
::
string_view
name
)
{
return
m_cat
->
add_column
(
name
);
}
void
assign
(
const
item
&
i
,
bool
updateLinked
)
{
assign
(
i
.
name
(),
i
.
value
(),
updateLinked
);
}
category_type
*
m_cat
=
nullptr
;
row_type
*
m_row
=
nullptr
;
};
...
...
src/parser.cpp
0 → 100644
View file @
24fa80ba
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <set>
#include <cif++/v2/parser.hpp>
// extern int VERBOSE;
namespace
cif
::
v2
{
const
uint32_t
kMaxLineLength
=
132
;
const
uint8_t
kCharTraitsTable
[
128
]
=
{
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
14
,
15
,
14
,
14
,
14
,
15
,
15
,
14
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
// 2
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
10
,
15
,
15
,
15
,
15
,
// 3
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
// 4
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
14
,
15
,
14
,
15
,
14
,
// 5
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
// 6
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
0
,
// 7
};
// --------------------------------------------------------------------
parse_error
::
parse_error
(
uint32_t
lineNr
,
const
std
::
string
&
message
)
:
std
::
runtime_error
(
"parse error at line "
+
std
::
to_string
(
lineNr
)
+
": "
+
message
)
{
}
// --------------------------------------------------------------------
const
char
*
SacParser
::
kTokenName
[]
=
{
"unknown"
,
"EOF"
,
"DATA"
,
"LOOP"
,
"GLOBAL"
,
"SAVE"
,
"STOP"
,
"Tag"
,
"Value"
};
const
char
*
SacParser
::
kValueName
[]
=
{
"Int"
,
"Float"
,
"Numeric"
,
"String"
,
"TextField"
,
"Inapplicable"
,
"Unknown"
};
// --------------------------------------------------------------------
bool
isUnquotedString
(
const
char
*
s
)
{
auto
ss
=
s
;
bool
result
=
isOrdinary
(
*
s
++
);
while
(
result
and
*
s
!=
0
)
{
result
=
isNonBlank
(
*
s
);
++
s
;
}
// but be careful it does not contain e.g. stop_
if
(
result
)
{
static
const
std
::
regex
reservedRx
(
R"((^(?:data|save)|.*(?:loop|stop|global))_.+)"
,
std
::
regex_constants
::
icase
);
result
=
not
std
::
regex_match
(
ss
,
reservedRx
);
}
return
result
;
}
// --------------------------------------------------------------------
SacParser
::
SacParser
(
std
::
istream
&
is
,
bool
init
)
:
mData
(
is
)
{
mValidate
=
true
;
mLineNr
=
1
;
mBol
=
true
;
if
(
init
)
mLookahead
=
getNextToken
();
}
void
SacParser
::
error
(
const
std
::
string
&
msg
)
{
throw
parse_error
(
mLineNr
,
msg
);
}
// getNextChar takes a char from the buffer, or if it is empty
// from the istream. This function also does carriage/linefeed
// translation.
int
SacParser
::
getNextChar
()
{
int
result
;
if
(
mBuffer
.
empty
())
result
=
mData
.
get
();
else
{
result
=
mBuffer
.
top
();
mBuffer
.
pop
();
}
// very simple CR/LF translation into LF
if
(
result
==
'\r'
)
{
int
lookahead
=
mData
.
get
();
if
(
lookahead
!=
'\n'
)
mBuffer
.
push
(
lookahead
);
result
=
'\n'
;
}
mTokenValue
+=
static_cast
<
char
>
(
result
);
if
(
result
==
'\n'
)
++
mLineNr
;
if
(
VERBOSE
>=
6
)
{
std
::
cerr
<<
"getNextChar => "
;
if
(
iscntrl
(
result
)
or
not
isprint
(
result
))
std
::
cerr
<<
int
(
result
)
<<
std
::
endl
;
else
std
::
cerr
<<
char
(
result
)
<<
std
::
endl
;
}
return
result
;
}
void
SacParser
::
retract
()
{
assert
(
not
mTokenValue
.
empty
());
char
ch
=
mTokenValue
.
back
();
if
(
ch
==
'\n'
)
--
mLineNr
;
mBuffer
.
push
(
ch
);
mTokenValue
.
pop_back
();
}
int
SacParser
::
restart
(
int
start
)
{
int
result
=
0
;
while
(
not
mTokenValue
.
empty
())
retract
();
switch
(
start
)
{
case
eStateStart
:
result
=
eStateFloat
;
break
;
case
eStateFloat
:
result
=
eStateInt
;
break
;
case
eStateInt
:
result
=
eStateValue
;
break
;
default
:
error
(
"Invalid state in SacParser"
);
}
mBol
=
false
;
return
result
;
}
void
SacParser
::
match
(
SacParser
::
CIFToken
t
)
{
if
(
mLookahead
!=
t
)
error
(
std
::
string
(
"Unexpected token, expected "
)
+
kTokenName
[
t
]
+
" but found "
+
kTokenName
[
mLookahead
]);
mLookahead
=
getNextToken
();
}
SacParser
::
CIFToken
SacParser
::
getNextToken
()
{
const
auto
kEOF
=
std
::
char_traits
<
char
>::
eof
();
CIFToken
result
=
eCIFTokenUnknown
;
int
quoteChar
=
0
;
int
state
=
eStateStart
,
start
=
eStateStart
;
mBol
=
false
;
mTokenValue
.
clear
();
mTokenType
=
eCIFValueUnknown
;
while
(
result
==
eCIFTokenUnknown
)
{
auto
ch
=
getNextChar
();
switch
(
state
)
{
case
eStateStart
:
if
(
ch
==
kEOF
)
result
=
eCIFTokenEOF
;
else
if
(
ch
==
'\n'
)
{
mBol
=
true
;
state
=
eStateWhite
;
}
else
if
(
ch
==
' '
or
ch
==
'\t'
)
state
=
eStateWhite
;
else
if
(
ch
==
'#'
)
state
=
eStateComment
;
else
if
(
ch
==
'_'
)
state
=
eStateTag
;
else
if
(
ch
==
';'
and
mBol
)
state
=
eStateTextField
;
else
if
(
ch
==
'\''
or
ch
==
'"'
)
{
quoteChar
=
ch
;
state
=
eStateQuotedString
;
}
else
state
=
start
=
restart
(
start
);
break
;
case
eStateWhite
:
if
(
ch
==
kEOF
)
result
=
eCIFTokenEOF
;
else
if
(
not
isspace
(
ch
))
{
state
=
eStateStart
;
retract
();
mTokenValue
.
clear
();
}
else
mBol
=
(
ch
==
'\n'
);
break
;
case
eStateComment
:
if
(
ch
==
'\n'
)
{
state
=
eStateStart
;
mBol
=
true
;
mTokenValue
.
clear
();
}
else
if
(
ch
==
kEOF
)
result
=
eCIFTokenEOF
;
else
if
(
not
isAnyPrint
(
ch
))
error
(
"invalid character in comment"
);
break
;
case
eStateTextField
:
if
(
ch
==
'\n'
)
state
=
eStateTextField
+
1
;
else
if
(
ch
==
kEOF
)
error
(
"unterminated textfield"
);
else
if
(
not
isAnyPrint
(
ch
))
// error("invalid character in text field '" + string({ static_cast<char>(ch) }) + "' (" + to_string((int)ch) + ")");
std
::
cerr
<<
"invalid character in text field '"
<<
std
::
string
({
static_cast
<
char
>
(
ch
)})
<<
"' ("
<<
ch
<<
") line: "
<<
mLineNr
<<
std
::
endl
;
break
;
case
eStateTextField
+
1
:
if
(
isTextLead
(
ch
)
or
ch
==
' '
or
ch
==
'\t'
)
state
=
eStateTextField
;
else
if
(
ch
==
';'
)
{
assert
(
mTokenValue
.
length
()
>=
2
);
mTokenValue
=
mTokenValue
.
substr
(
1
,
mTokenValue
.
length
()
-
3
);
mTokenType
=
eCIFValueTextField
;
result
=
eCIFTokenValue
;
}
else
if
(
ch
==
kEOF
)
error
(
"unterminated textfield"
);
else
if
(
ch
!=
'\n'
)
error
(
"invalid character in text field"
);
break
;
case
eStateQuotedString
:
if
(
ch
==
kEOF
)
error
(
"unterminated quoted string"
);
else
if
(
ch
==
quoteChar
)
state
=
eStateQuotedStringQuote
;
else
if
(
not
isAnyPrint
(
ch
))
std
::
cerr
<<
"invalid character in quoted string '"
<<
std
::
string
({
static_cast
<
char
>
(
ch
)})
<<
"' ("
<<
ch
<<
") line: "
<<
mLineNr
<<
std
::
endl
;
// error("invalid character in quoted string");
break
;
case
eStateQuotedStringQuote
:
if
(
isWhite
(
ch
))
{
retract
();
result
=
eCIFTokenValue
;
mTokenType
=
eCIFValueString
;
if
(
mTokenValue
.
length
()
<
2
)
error
(
"Invalid quoted string token"
);
mTokenValue
=
mTokenValue
.
substr
(
1
,
mTokenValue
.
length
()
-
2
);
}
else
if
(
ch
==
quoteChar
)
;
else
if
(
isAnyPrint
(
ch
))
state
=
eStateQuotedString
;
else
if
(
ch
==
kEOF
)
error
(
"unterminated quoted string"
);
else
error
(
"invalid character in quoted string"
);
break
;
case
eStateTag
:
if
(
not
isNonBlank
(
ch
))
{
retract
();
result
=
eCIFTokenTag
;
}
break
;
case
eStateFloat
:
if
(
ch
==
'+'
or
ch
==
'-'
)
{
state
=
eStateFloat
+
1
;
}
else
if
(
isdigit
(
ch
))
state
=
eStateFloat
+
1
;
else
state
=
start
=
restart
(
start
);
break
;
case
eStateFloat
+
1
:
// if (ch == '(') // numeric???
// mState = eStateNumericSuffix;
// else
if
(
ch
==
'.'
)
state
=
eStateFloat
+
2
;
else
if
(
tolower
(
ch
)
==
'e'
)
state
=
eStateFloat
+
3
;
else
if
(
isWhite
(
ch
)
or
ch
==
kEOF
)
{
retract
();
result
=
eCIFTokenValue
;
mTokenType
=
eCIFValueInt
;
}
else
state
=
start
=
restart
(
start
);
break
;
// parsed '.'
case
eStateFloat
+
2
:
if
(
tolower
(
ch
)
==
'e'
)
state
=
eStateFloat
+
3
;
else
if
(
isWhite
(
ch
)
or
ch
==
kEOF
)
{
retract
();
result
=
eCIFTokenValue
;
mTokenType
=
eCIFValueFloat
;
}
else
state
=
start
=
restart
(
start
);
break
;
// parsed 'e'
case
eStateFloat
+
3
:
if
(
ch
==
'-'
or
ch
==
'+'
)
state
=
eStateFloat
+
4
;
else
if
(
isdigit
(
ch
))
state
=
eStateFloat
+
5
;
else
state
=
start
=
restart
(
start
);
break
;
case
eStateFloat
+
4
:
if
(
isdigit
(
ch
))
state
=
eStateFloat
+
5
;
else
state
=
start
=
restart
(
start
);
break
;
case
eStateFloat
+
5
:
if
(
isWhite
(
ch
)
or
ch
==
kEOF
)
{
retract
();
result
=
eCIFTokenValue
;
mTokenType
=
eCIFValueFloat
;
}
else
state
=
start
=
restart
(
start
);
break
;
case
eStateInt
:
if
(
isdigit
(
ch
)
or
ch
==
'+'
or
ch
==
'-'
)
state
=
eStateInt
+
1
;
else
state
=
start
=
restart
(
start
);
break
;
case
eStateInt
+
1
:
if
(
isWhite
(
ch
)
or
ch
==
kEOF
)
{
retract
();
result
=
eCIFTokenValue
;
mTokenType
=
eCIFValueInt
;
}
else
state
=
start
=
restart
(
start
);
break
;
case
eStateValue
:
if
(
ch
==
'_'
)
{
std
::
string
s
=
toLowerCopy
(
mTokenValue
);
if
(
s
==
"global_"
)
result
=
eCIFTokenGLOBAL
;
else
if
(
s
==
"stop_"
)
result
=
eCIFTokenSTOP
;
else
if
(
s
==
"loop_"
)
result
=
eCIFTokenLOOP
;
else
if
(
s
==
"data_"
)
{
state
=
eStateDATA
;
continue
;
}
else
if
(
s
==
"save_"
)
{
state
=
eStateSAVE
;
continue
;
}
}
if
(
result
==
eCIFTokenUnknown
and
not
isNonBlank
(
ch
))
{
retract
();
result
=
eCIFTokenValue
;
if
(
mTokenValue
==
"."
)
mTokenType
=
eCIFValueInapplicable
;
else
if
(
mTokenValue
==
"?"
)
{
mTokenType
=
eCIFValueUnknown
;
mTokenValue
.
clear
();
}
}
break
;
case
eStateDATA
:
case
eStateSAVE
:
if
(
not
isNonBlank
(
ch
))
{
retract
();
if
(
state
==
eStateDATA
)
result
=
eCIFTokenDATA
;
else
result
=
eCIFTokenSAVE
;
mTokenValue
.
erase
(
mTokenValue
.
begin
(),
mTokenValue
.
begin
()
+
5
);
}
break
;
default
:
assert
(
false
);
error
(
"Invalid state in getNextToken"
);
break
;
}
}
if
(
VERBOSE
>=
5
)
{
std
::
cerr
<<
kTokenName
[
result
];
if
(
mTokenType
!=
eCIFValueUnknown
)
std
::
cerr
<<
' '
<<
kValueName
[
mTokenType
];
if
(
result
!=
eCIFTokenEOF
)
std
::
cerr
<<
" '"
<<
mTokenValue
<<
'\''
;
std
::
cerr
<<
std
::
endl
;
}
return
result
;
}
DatablockIndex
SacParser
::
indexDatablocks
()
{
DatablockIndex
index
;
// first locate the start, as fast as we can
auto
&
sb
=
*
mData
.
rdbuf
();
enum
{
start
,
comment
,
string
,
string_quote
,
qstring
,
data
,
data_name
}
state
=
start
;
int
quote
=
0
;
bool
bol
=
true
;
const
char
dblk
[]
=
"data_"
;
std
::
string
::
size_type
si
=
0
;
std
::
string
datablock
;
for
(
auto
ch
=
sb
.
sbumpc
();
ch
!=
std
::
streambuf
::
traits_type
::
eof
();
ch
=
sb
.
sbumpc
())
{
switch
(
state
)
{
case
start
:
switch
(
ch
)
{
case
'#'
:
state
=
comment
;
break
;
case
'd'
:
case
'D'
:
state
=
data
;
si
=
1
;
break
;
case
'\''
:
case
'"'
:
state
=
string
;
quote
=
ch
;
break
;
case
';'
:
if
(
bol
)
state
=
qstring
;
break
;
}
break
;
case
comment
:
if
(
ch
==
'\n'
)
state
=
start
;
break
;
case
string
:
if
(
ch
==
quote
)
state
=
string_quote
;
break
;
case
string_quote
:
if
(
std
::
isspace
(
ch
))
state
=
start
;
else
state
=
string
;
break
;
case
qstring
:
if
(
ch
==
';'
and
bol
)
state
=
start
;
break
;
case
data
:
if
(
dblk
[
si
]
==
0
and
isNonBlank
(
ch
))
{
datablock
=
{
static_cast
<
char
>
(
ch
)};
state
=
data_name
;
}
else
if
(
dblk
[
si
++
]
!=
ch
)
state
=
start
;
break
;
case
data_name
:
if
(
isNonBlank
(
ch
))
datablock
.
insert
(
datablock
.
end
(),
char
(
ch
));
else
if
(
isspace
(
ch
))
{
if
(
not
datablock
.
empty
())
index
[
datablock
]
=
mData
.
tellg
();
state
=
start
;
}
else
state
=
start
;
break
;
}
bol
=
(
ch
==
'\n'
);
}
return
index
;
}
bool
SacParser
::
parseSingleDatablock
(
const
std
::
string
&
datablock
)
{
// first locate the start, as fast as we can
auto
&
sb
=
*
mData
.
rdbuf
();
enum
{
start
,
comment
,
string
,
string_quote
,
qstring
,
data
}
state
=
start
;
int
quote
=
0
;
bool
bol
=
true
;
std
::
string
dblk
=
"data_"
+
datablock
;
std
::
string
::
size_type
si
=
0
;
bool
found
=
false
;
for
(
auto
ch
=
sb
.
sbumpc
();
not
found
and
ch
!=
std
::
streambuf
::
traits_type
::
eof
();
ch
=
sb
.
sbumpc
())
{
switch
(
state
)
{
case
start
:
switch
(
ch
)
{
case
'#'
:
state
=
comment
;
break
;
case
'd'
:
case
'D'
:
state
=
data
;
si
=
1
;
break
;
case
'\''
:
case
'"'
:
state
=
string
;
quote
=
ch
;
break
;
case
';'
:
if
(
bol
)
state
=
qstring
;
break
;
}
break
;
case
comment
:
if
(
ch
==
'\n'
)
state
=
start
;
break
;
case
string
:
if
(
ch
==
quote
)
state
=
string_quote
;
break
;
case
string_quote
:
if
(
std
::
isspace
(
ch
))
state
=
start
;
else
state
=
string
;
break
;
case
qstring
:
if
(
ch
==
';'
and
bol
)
state
=
start
;
break
;
case
data
:
if
(
isspace
(
ch
)
and
dblk
[
si
]
==
0
)
found
=
true
;
else
if
(
dblk
[
si
++
]
!=
ch
)
state
=
start
;
break
;
}
bol
=
(
ch
==
'\n'
);
}
if
(
found
)
{
produceDatablock
(
datablock
);
mLookahead
=
getNextToken
();
parseDataBlock
();
}
return
found
;
}
bool
SacParser
::
parseSingleDatablock
(
const
std
::
string
&
datablock
,
const
DatablockIndex
&
index
)
{
bool
result
=
false
;
auto
i
=
index
.
find
(
datablock
);
if
(
i
!=
index
.
end
())
{
mData
.
seekg
(
i
->
second
);
produceDatablock
(
datablock
);
mLookahead
=
getNextToken
();
parseDataBlock
();
result
=
true
;
}
return
result
;
}
void
SacParser
::
parseFile
()
{
while
(
mLookahead
!=
eCIFTokenEOF
)
{
switch
(
mLookahead
)
{
case
eCIFTokenGLOBAL
:
parseGlobal
();
break
;
case
eCIFTokenDATA
:
produceDatablock
(
mTokenValue
);
match
(
eCIFTokenDATA
);
parseDataBlock
();
break
;
default
:
error
(
"This file does not seem to be an mmCIF file"
);
break
;
}
}
}
void
SacParser
::
parseGlobal
()
{
match
(
eCIFTokenGLOBAL
);
while
(
mLookahead
==
eCIFTokenTag
)
{
match
(
eCIFTokenTag
);
match
(
eCIFTokenValue
);
}
}
void
SacParser
::
parseDataBlock
()
{
std
::
string
cat
;
while
(
mLookahead
==
eCIFTokenLOOP
or
mLookahead
==
eCIFTokenTag
or
mLookahead
==
eCIFTokenSAVE
)
{
switch
(
mLookahead
)
{
case
eCIFTokenLOOP
:
{
cat
.
clear
();
// should start a new category
match
(
eCIFTokenLOOP
);
std
::
vector
<
std
::
string
>
tags
;
while
(
mLookahead
==
eCIFTokenTag
)
{
std
::
string
catName
,
itemName
;
std
::
tie
(
catName
,
itemName
)
=
splitTagName
(
mTokenValue
);
if
(
cat
.
empty
())
{
produceCategory
(
catName
);
cat
=
catName
;
}
else
if
(
not
iequals
(
cat
,
catName
))
error
(
"inconsistent categories in loop_"
);
tags
.
push_back
(
itemName
);
match
(
eCIFTokenTag
);
}
while
(
mLookahead
==
eCIFTokenValue
)
{
produceRow
();
for
(
auto
tag
:
tags
)
{
produceItem
(
cat
,
tag
,
mTokenValue
);
match
(
eCIFTokenValue
);
}
}
cat
.
clear
();
break
;
}
case
eCIFTokenTag
:
{
std
::
string
catName
,
itemName
;
std
::
tie
(
catName
,
itemName
)
=
splitTagName
(
mTokenValue
);
if
(
not
iequals
(
cat
,
catName
))
{
produceCategory
(
catName
);
cat
=
catName
;
produceRow
();
}
match
(
eCIFTokenTag
);
produceItem
(
cat
,
itemName
,
mTokenValue
);
match
(
eCIFTokenValue
);
break
;
}
case
eCIFTokenSAVE
:
parseSaveFrame
();
break
;
default
:
assert
(
false
);
break
;
}
}
}
void
SacParser
::
parseSaveFrame
()
{
error
(
"A regular CIF file should not contain a save frame"
);
}
// --------------------------------------------------------------------
Parser
::
Parser
(
std
::
istream
&
is
,
File
&
f
,
bool
init
)
:
SacParser
(
is
,
init
)
,
mFile
(
f
)
,
mDataBlock
(
nullptr
)
{
}
void
Parser
::
produceDatablock
(
const
std
::
string
&
name
)
{
mDataBlock
=
new
Datablock
(
name
);
mFile
.
append
(
mDataBlock
);
}
void
Parser
::
produceCategory
(
const
std
::
string
&
name
)
{
if
(
VERBOSE
>=
4
)
std
::
cerr
<<
"producing category "
<<
name
<<
std
::
endl
;
std
::
tie
(
mCat
,
std
::
ignore
)
=
mDataBlock
->
emplace
(
name
);
}
void
Parser
::
produceRow
()
{
if
(
VERBOSE
>=
4
)
std
::
cerr
<<
"producing row for category "
<<
mCat
->
name
()
<<
std
::
endl
;
mCat
->
emplace
({});
mRow
=
mCat
->
back
();
mRow
.
lineNr
(
mLineNr
);
}
void
Parser
::
produceItem
(
const
std
::
string
&
category
,
const
std
::
string
&
item
,
const
std
::
string
&
value
)
{
if
(
VERBOSE
>=
4
)
std
::
cerr
<<
"producing _"
<<
category
<<
'.'
<<
item
<<
" -> "
<<
value
<<
std
::
endl
;
if
(
not
iequals
(
category
,
mCat
->
name
()))
error
(
"inconsistent categories in loop_"
);
mRow
[
item
]
=
mTokenValue
;
}
// --------------------------------------------------------------------
struct
DictParserDataImpl
{
// temporary values for constructing dictionaries
std
::
vector
<
ValidateCategory
>
mCategoryValidators
;
std
::
map
<
std
::
string
,
std
::
vector
<
ValidateItem
>>
mItemValidators
;
std
::
set
<
std
::
tuple
<
std
::
string
,
std
::
string
>>
mLinkedItems
;
};
DictParser
::
DictParser
(
Validator
&
validator
,
std
::
istream
&
is
)
:
Parser
(
is
,
mFile
)
,
mValidator
(
validator
)
,
mImpl
(
new
DictParserDataImpl
)
{
}
DictParser
::~
DictParser
()
{
delete
mImpl
;
}
void
DictParser
::
parseSaveFrame
()
{
if
(
not
mCollectedItemTypes
)
mCollectedItemTypes
=
collectItemTypes
();
std
::
string
saveFrameName
=
mTokenValue
;
if
(
saveFrameName
.
empty
())
error
(
"Invalid save frame, should contain more than just 'save_' here"
);
bool
isCategorySaveFrame
=
mTokenValue
[
0
]
!=
'_'
;
Datablock
dict
(
mTokenValue
);
Datablock
::
iterator
cat
=
dict
.
end
();
match
(
eCIFTokenSAVE
);
while
(
mLookahead
==
eCIFTokenLOOP
or
mLookahead
==
eCIFTokenTag
)
{
if
(
mLookahead
==
eCIFTokenLOOP
)
{
cat
=
dict
.
end
();
// should start a new category
match
(
eCIFTokenLOOP
);
std
::
vector
<
std
::
string
>
tags
;
while
(
mLookahead
==
eCIFTokenTag
)
{
std
::
string
catName
,
itemName
;
std
::
tie
(
catName
,
itemName
)
=
splitTagName
(
mTokenValue
);
if
(
cat
==
dict
.
end
())
std
::
tie
(
cat
,
std
::
ignore
)
=
dict
.
emplace
(
catName
);
else
if
(
not
iequals
(
cat
->
name
(),
catName
))
error
(
"inconsistent categories in loop_"
);
tags
.
push_back
(
itemName
);
match
(
eCIFTokenTag
);
}
while
(
mLookahead
==
eCIFTokenValue
)
{
cat
->
emplace
({});
auto
row
=
cat
->
back
();
for
(
auto
tag
:
tags
)
{
row
[
tag
]
=
mTokenValue
;
match
(
eCIFTokenValue
);
}
}
cat
=
dict
.
end
();
}
else
{
std
::
string
catName
,
itemName
;
std
::
tie
(
catName
,
itemName
)
=
splitTagName
(
mTokenValue
);
if
(
cat
==
dict
.
end
()
or
not
iequals
(
cat
->
name
(),
catName
))
std
::
tie
(
cat
,
std
::
ignore
)
=
dict
.
emplace
(
catName
);
match
(
eCIFTokenTag
);
if
(
cat
->
empty
())
cat
->
emplace
({});
cat
->
back
()[
itemName
]
=
mTokenValue
;
match
(
eCIFTokenValue
);
}
}
match
(
eCIFTokenSAVE
);
if
(
isCategorySaveFrame
)
{
std
::
string
category
;
cif
::
tie
(
category
)
=
dict
[
"category"
].
front
().
get
(
"id"
);
std
::
vector
<
std
::
string
>
keys
;
for
(
auto
k
:
dict
[
"category_key"
])
keys
.
push_back
(
std
::
get
<
1
>
(
splitTagName
(
k
[
"name"
].
as
<
std
::
string
>
())));
iset
groups
;
for
(
auto
g
:
dict
[
"category_group"
])
groups
.
insert
(
g
[
"id"
].
as
<
std
::
string
>
());
mImpl
->
mCategoryValidators
.
push_back
(
ValidateCategory
{
category
,
keys
,
groups
});
}
else
{
// if the type code is missing, this must be a pointer, just skip it
std
::
string
typeCode
;
cif
::
tie
(
typeCode
)
=
dict
[
"item_type"
].
front
().
get
(
"code"
);
const
ValidateType
*
tv
=
nullptr
;
if
(
not
(
typeCode
.
empty
()
or
typeCode
==
"?"
))
tv
=
mValidator
.
getValidatorForType
(
typeCode
);
iset
ess
;
for
(
auto
e
:
dict
[
"item_enumeration"
])
ess
.
insert
(
e
[
"value"
].
as
<
std
::
string
>
());
std
::
string
defaultValue
;
cif
::
tie
(
defaultValue
)
=
dict
[
"item_default"
].
front
().
get
(
"value"
);
bool
defaultIsNull
=
false
;
if
(
defaultValue
.
empty
())
{
for
(
auto
&
r
:
dict
[
"_item_default"
])
{
defaultIsNull
=
r
[
"value"
].
is_null
();
break
;
}
}
// collect the dict from our dataBlock and construct validators
for
(
auto
i
:
dict
[
"item"
])
{
std
::
string
tagName
,
category
,
mandatory
;
cif
::
tie
(
tagName
,
category
,
mandatory
)
=
i
.
get
(
"name"
,
"category_id"
,
"mandatory_code"
);
std
::
string
catName
,
itemName
;
std
::
tie
(
catName
,
itemName
)
=
splitTagName
(
tagName
);
if
(
catName
.
empty
()
or
itemName
.
empty
())
error
(
"Invalid tag name in _item.name "
+
tagName
);
if
(
not
iequals
(
category
,
catName
)
and
not
(
category
.
empty
()
or
category
==
"?"
))
error
(
"specified category id does match the implicit category name for tag '"
+
tagName
+
'\''
);
else
category
=
catName
;
auto
&
ivs
=
mImpl
->
mItemValidators
[
category
];
auto
vi
=
find
(
ivs
.
begin
(),
ivs
.
end
(),
ValidateItem
{
itemName
});
if
(
vi
==
ivs
.
end
())
ivs
.
push_back
(
ValidateItem
{
itemName
,
iequals
(
mandatory
,
"yes"
),
tv
,
ess
,
defaultValue
,
defaultIsNull
});
else
{
// need to update the itemValidator?
if
(
vi
->
mMandatory
!=
(
iequals
(
mandatory
,
"yes"
)))
{
if
(
VERBOSE
>
2
)
{
std
::
cerr
<<
"inconsistent mandatory value for "
<<
tagName
<<
" in dictionary"
<<
std
::
endl
;
if
(
iequals
(
tagName
,
saveFrameName
))
std
::
cerr
<<
"choosing "
<<
mandatory
<<
std
::
endl
;
else
std
::
cerr
<<
"choosing "
<<
(
vi
->
mMandatory
?
"Y"
:
"N"
)
<<
std
::
endl
;
}
if
(
iequals
(
tagName
,
saveFrameName
))
vi
->
mMandatory
=
(
iequals
(
mandatory
,
"yes"
));
}
if
(
vi
->
mType
!=
nullptr
and
tv
!=
nullptr
and
vi
->
mType
!=
tv
)
{
if
(
VERBOSE
>
1
)
std
::
cerr
<<
"inconsistent type for "
<<
tagName
<<
" in dictionary"
<<
std
::
endl
;
}
// vi->mMandatory = (iequals(mandatory, "yes"));
if
(
vi
->
mType
==
nullptr
)
vi
->
mType
=
tv
;
vi
->
mEnums
.
insert
(
ess
.
begin
(),
ess
.
end
());
// anything else yet?
// ...
}
}
// collect the dict from our dataBlock and construct validators
for
(
auto
i
:
dict
[
"item_linked"
])
{
std
::
string
childTagName
,
parentTagName
;
cif
::
tie
(
childTagName
,
parentTagName
)
=
i
.
get
(
"child_name"
,
"parent_name"
);
mImpl
->
mLinkedItems
.
emplace
(
childTagName
,
parentTagName
);
}
}
}
void
DictParser
::
linkItems
()
{
if
(
not
mDataBlock
)
error
(
"no datablock"
);
auto
&
dict
=
*
mDataBlock
;
// links are identified by a parent category, a child category and a group ID
using
key_type
=
std
::
tuple
<
std
::
string
,
std
::
string
,
int
>
;
std
::
map
<
key_type
,
size_t
>
linkIndex
;
// Each link group consists of a set of keys
std
::
vector
<
std
::
tuple
<
std
::
vector
<
std
::
string
>
,
std
::
vector
<
std
::
string
>>>
linkKeys
;
auto
addLink
=
[
&
](
size_t
ix
,
const
std
::
string
&
pk
,
const
std
::
string
&
ck
)
{
auto
&&
[
pkeys
,
ckeys
]
=
linkKeys
.
at
(
ix
);
bool
found
=
false
;
for
(
size_t
i
=
0
;
i
<
pkeys
.
size
();
++
i
)
{
if
(
pkeys
[
i
]
==
pk
and
ckeys
[
i
]
==
ck
)
{
found
=
true
;
break
;
}
}
if
(
not
found
)
{
pkeys
.
push_back
(
pk
);
ckeys
.
push_back
(
ck
);
}
};
auto
&
linkedGroupList
=
dict
[
"pdbx_item_linked_group_list"
];
for
(
auto
gl
:
linkedGroupList
)
{
std
::
string
child
,
parent
;
int
link_group_id
;
cif
::
tie
(
child
,
parent
,
link_group_id
)
=
gl
.
get
(
"child_name"
,
"parent_name"
,
"link_group_id"
);
auto
civ
=
mValidator
.
getValidatorForItem
(
child
);
if
(
civ
==
nullptr
)
error
(
"in pdbx_item_linked_group_list, item '"
+
child
+
"' is not specified"
);
auto
piv
=
mValidator
.
getValidatorForItem
(
parent
);
if
(
piv
==
nullptr
)
error
(
"in pdbx_item_linked_group_list, item '"
+
parent
+
"' is not specified"
);
key_type
key
{
piv
->
mCategory
->
mName
,
civ
->
mCategory
->
mName
,
link_group_id
};
if
(
not
linkIndex
.
count
(
key
))
{
linkIndex
[
key
]
=
linkKeys
.
size
();
linkKeys
.
push_back
({});
}
size_t
ix
=
linkIndex
.
at
(
key
);
addLink
(
ix
,
piv
->
mTag
,
civ
->
mTag
);
}
// Only process inline linked items if the linked group list is absent
if
(
linkedGroupList
.
empty
())
{
// for links recorded in categories but not in pdbx_item_linked_group_list
for
(
auto
li
:
mImpl
->
mLinkedItems
)
{
std
::
string
child
,
parent
;
std
::
tie
(
child
,
parent
)
=
li
;
auto
civ
=
mValidator
.
getValidatorForItem
(
child
);
if
(
civ
==
nullptr
)
error
(
"in pdbx_item_linked_group_list, item '"
+
child
+
"' is not specified"
);
auto
piv
=
mValidator
.
getValidatorForItem
(
parent
);
if
(
piv
==
nullptr
)
error
(
"in pdbx_item_linked_group_list, item '"
+
parent
+
"' is not specified"
);
key_type
key
{
piv
->
mCategory
->
mName
,
civ
->
mCategory
->
mName
,
0
};
if
(
not
linkIndex
.
count
(
key
))
{
linkIndex
[
key
]
=
linkKeys
.
size
();
linkKeys
.
push_back
({});
}
size_t
ix
=
linkIndex
.
at
(
key
);
addLink
(
ix
,
piv
->
mTag
,
civ
->
mTag
);
}
}
auto
&
linkedGroup
=
dict
[
"pdbx_item_linked_group"
];
// now store the links in the validator
for
(
auto
&
kv
:
linkIndex
)
{
ValidateLink
link
=
{};
std
::
tie
(
link
.
mParentCategory
,
link
.
mChildCategory
,
link
.
mLinkGroupID
)
=
kv
.
first
;
std
::
tie
(
link
.
mParentKeys
,
link
.
mChildKeys
)
=
linkKeys
[
kv
.
second
];
// look up the label
for
(
auto
r
:
linkedGroup
.
find
(
cif
::
Key
(
"category_id"
)
==
link
.
mChildCategory
and
cif
::
Key
(
"link_group_id"
)
==
link
.
mLinkGroupID
))
{
link
.
mLinkGroupLabel
=
r
[
"label"
].
as
<
std
::
string
>
();
break
;
}
mValidator
.
addLinkValidator
(
std
::
move
(
link
));
}
// now make sure the itemType is specified for all itemValidators
for
(
auto
&
cv
:
mValidator
.
mCategoryValidators
)
{
for
(
auto
&
iv
:
cv
.
mItemValidators
)
{
if
(
iv
.
mType
==
nullptr
and
cif
::
VERBOSE
>=
0
)
std
::
cerr
<<
"Missing item_type for "
<<
iv
.
mTag
<<
std
::
endl
;
}
}
}
void
DictParser
::
loadDictionary
()
{
std
::
unique_ptr
<
Datablock
>
dict
;
Datablock
*
savedDatablock
=
mDataBlock
;
try
{
while
(
mLookahead
!=
eCIFTokenEOF
)
{
switch
(
mLookahead
)
{
case
eCIFTokenGLOBAL
:
parseGlobal
();
break
;
default
:
{
dict
.
reset
(
new
Datablock
(
mTokenValue
));
// dummy datablock, for constructing the validator only
mDataBlock
=
dict
.
get
();
match
(
eCIFTokenDATA
);
parseDataBlock
();
break
;
}
}
}
}
catch
(
const
std
::
exception
&
)
{
if
(
cif
::
VERBOSE
>=
0
)
std
::
cerr
<<
"Error parsing dictionary"
<<
std
::
endl
;
throw
;
}
// store all validators
for
(
auto
&
ic
:
mImpl
->
mCategoryValidators
)
mValidator
.
addCategoryValidator
(
std
::
move
(
ic
));
mImpl
->
mCategoryValidators
.
clear
();
for
(
auto
&
iv
:
mImpl
->
mItemValidators
)
{
auto
cv
=
mValidator
.
getValidatorForCategory
(
iv
.
first
);
if
(
cv
==
nullptr
)
error
(
"Undefined category '"
+
iv
.
first
);
for
(
auto
&
v
:
iv
.
second
)
const_cast
<
ValidateCategory
*>
(
cv
)
->
addItemValidator
(
std
::
move
(
v
));
}
// check all item validators for having a typeValidator
if
(
dict
)
linkItems
();
// store meta information
Datablock
::
iterator
info
;
bool
n
;
std
::
tie
(
info
,
n
)
=
mDataBlock
->
emplace
(
"dictionary"
);
if
(
n
)
{
auto
r
=
info
->
front
();
mValidator
.
dictName
(
r
[
"title"
].
as
<
std
::
string
>
());
mValidator
.
dictVersion
(
r
[
"version"
].
as
<
std
::
string
>
());
}
mDataBlock
=
savedDatablock
;
mImpl
->
mItemValidators
.
clear
();
}
bool
DictParser
::
collectItemTypes
()
{
bool
result
=
false
;
if
(
not
mDataBlock
)
error
(
"no datablock"
);
auto
&
dict
=
*
mDataBlock
;
for
(
auto
&
t
:
dict
[
"item_type_list"
])
{
std
::
string
code
,
primitiveCode
,
construct
;
cif
::
tie
(
code
,
primitiveCode
,
construct
)
=
t
.
get
(
"code"
,
"primitive_code"
,
"construct"
);
ba
::
replace_all
(
construct
,
"
\\
n"
,
"
\n
"
);
ba
::
replace_all
(
construct
,
"
\\
t"
,
"
\t
"
);
ba
::
replace_all
(
construct
,
"
\\\n
"
,
""
);
try
{
ValidateType
v
=
{
code
,
mapToPrimitiveType
(
primitiveCode
),
boost
::
regex
(
construct
,
boost
::
regex
::
extended
|
boost
::
regex
::
optimize
)};
mValidator
.
addTypeValidator
(
std
::
move
(
v
));
}
catch
(
const
std
::
exception
&
)
{
throw_with_nested
(
parse_error
(
t
.
lineNr
(),
"error in regular expression"
));
}
// Do not replace an already defined type validator, this won't work with pdbx_v40
// as it has a name that is too strict for its own names :-)
// if (mFileImpl.mTypeValidators.count(v))
// mFileImpl.mTypeValidators.erase(v);
if
(
VERBOSE
>=
5
)
std
::
cerr
<<
"Added type "
<<
code
<<
" ("
<<
primitiveCode
<<
") => "
<<
construct
<<
std
::
endl
;
result
=
true
;
}
return
result
;
}
}
// namespace cif
test/unit-v2-test.cpp
View file @
24fa80ba
...
...
@@ -35,6 +35,8 @@
// #include <cif++/CifValidator.hpp>
// #include <cif++/CifParser.hpp>
#include <cif++/v2/parser.hpp>
namespace
tt
=
boost
::
test_tools
;
std
::
filesystem
::
path
gTestDir
=
std
::
filesystem
::
current_path
();
// filled in first test
...
...
@@ -264,45 +266,54 @@ BOOST_AUTO_TEST_CASE(ci_1)
// --------------------------------------------------------------------
//
BOOST_AUTO_TEST_CASE(f_1)
//
{
//
// using namespace mmcif;
BOOST_AUTO_TEST_CASE
(
f_1
)
{
// using namespace mmcif;
//
auto f = R"(data_TEST
//
#
//
loop_
//
_test.id
//
_test.name
//
1 aap
//
2 noot
//
3 mies
//
)"_cf;
auto
f
=
R"(data_TEST
#
loop_
_test.id
_test.name
1 aap
2 noot
3 mies
)"
_cf
;
//
BOOST_ASSERT(not f.empty());
//
BOOST_ASSERT(f.size() == 1);
BOOST_ASSERT
(
not
f
.
empty
());
BOOST_ASSERT
(
f
.
size
()
==
1
);
//
auto &db = f.front();
auto
&
db
=
f
.
front
();
//
BOOST_CHECK(db.name() == "TEST");
BOOST_CHECK
(
db
.
name
()
==
"TEST"
);
//
auto &test = db["test"];
//
BOOST_CHECK(test.size() == 3);
auto
&
test
=
db
[
"test"
];
BOOST_CHECK
(
test
.
size
()
==
3
);
// // wrong! the next lines will crash. And that's OK, don't do that
// // for (auto r: test)
// // test.erase(r);
const
char
*
ts
[]
=
{
"aap"
,
"noot"
,
"mies"
};
// // BOOST_CHECK(test.empty());
int
n
=
1
;
for
(
const
auto
&
[
i
,
s
]
:
test
.
rows
<
int
,
std
::
string
>
(
"id"
,
"name"
))
{
BOOST_CHECK_EQUAL
(
i
,
n
);
BOOST_CHECK_EQUAL
(
s
.
compare
(
ts
[
n
-
1
]),
0
);
++
n
;
}
// // test.purge();
// for (auto r: test)
// test.erase(r);
// // auto n = test.erase(cif::Key("id") == 1, [](const cif::Row &r)
// // {
// // BOOST_CHECK_EQUAL(r["id"].as<int>(), 1);
// // BOOST_CHECK_EQUAL(r["name"].as<std::string>(), "aap"); });
// BOOST_CHECK(test.empty());
// // BOOST_CHECK_EQUAL(n, 1);
// }
// test.clear();
// auto n = test.erase(cif::Key("id") == 1, [](const cif::Row &r)
// {
// BOOST_CHECK_EQUAL(r["id"].as<int>(), 1);
// BOOST_CHECK_EQUAL(r["name"].as<std::string>(), "aap"); });
// BOOST_CHECK_EQUAL(n, 1);
}
// // --------------------------------------------------------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment