Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
libcifpp
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
open
libcifpp
Commits
32f4749d
Unverified
Commit
32f4749d
authored
Jun 07, 2023
by
Maarten L. Hekkelman
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
faster cif parser
parent
da12be87
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
96 additions
and
166 deletions
+96
-166
CMakeLists.txt
+1
-1
changelog
+4
-0
include/cif++/parser.hpp
+13
-16
include/cif++/validate.hpp
+2
-1
src/parser.cpp
+33
-143
src/validate.cpp
+2
-2
test/io-test.cpp
+40
-0
test/unit-v2-test.cpp
+1
-3
No files found.
CMakeLists.txt
View file @
32f4749d
...
@@ -25,7 +25,7 @@
...
@@ -25,7 +25,7 @@
cmake_minimum_required
(
VERSION 3.16
)
cmake_minimum_required
(
VERSION 3.16
)
# set the project name
# set the project name
project
(
cifpp VERSION 5.0.
9
LANGUAGES CXX
)
project
(
cifpp VERSION 5.0.
10
LANGUAGES CXX
)
list
(
PREPEND CMAKE_MODULE_PATH
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/cmake"
)
list
(
PREPEND CMAKE_MODULE_PATH
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/cmake"
)
...
...
changelog
View file @
32f4749d
Version
5.0.10
-
Fix
in
progress_bar
,
was
using
too
much
CPU
-
Optimised
mmCIF
parser
Version
5.0.9
Version
5.0.9
-
Fix
in
dihedral
angle
calculations
-
Fix
in
dihedral
angle
calculations
-
Added
create_water
to
model
-
Added
create_water
to
model
...
...
include/cif++/parser.hpp
View file @
32f4749d
...
@@ -63,9 +63,14 @@ class sac_parser
...
@@ -63,9 +63,14 @@ class sac_parser
kAnyPrintMask
=
1
<<
3
kAnyPrintMask
=
1
<<
3
};
};
static
bool
is_whit
e
(
int
ch
)
static
constexpr
bool
is_spac
e
(
int
ch
)
{
{
return
std
::
isspace
(
ch
)
or
ch
==
'#'
;
return
ch
==
' '
or
ch
==
'\t'
or
ch
==
'\r'
or
ch
==
'\n'
;
}
static
constexpr
bool
is_white
(
int
ch
)
{
return
is_space
(
ch
)
or
ch
==
'#'
;
}
}
static
constexpr
bool
is_ordinary
(
int
ch
)
static
constexpr
bool
is_ordinary
(
int
ch
)
...
@@ -136,15 +141,13 @@ class sac_parser
...
@@ -136,15 +141,13 @@ class sac_parser
}
}
}
}
// get_next_char takes a char from the buffer, or if it is empty
// get_next_char takes the next character from the istream.
// from the istream. This function also does carriage/linefeed
// This function also does carriage/linefeed translation.
// translation.
int
get_next_char
();
int
get_next_char
();
// Put the last read character back into the istream
void
retract
();
void
retract
();
int
restart
(
int
start
);
CIFToken
get_next_token
();
CIFToken
get_next_token
();
void
match
(
CIFToken
token
);
void
match
(
CIFToken
token
);
...
@@ -191,7 +194,7 @@ class sac_parser
...
@@ -191,7 +194,7 @@ class sac_parser
protected
:
protected
:
enum
State
enum
class
State
{
{
Start
,
Start
,
White
,
White
,
...
@@ -204,9 +207,8 @@ class sac_parser
...
@@ -204,9 +207,8 @@ class sac_parser
UnquotedString
,
UnquotedString
,
Tag
,
Tag
,
TextField
,
TextField
,
Float
=
100
,
TextFieldNL
,
Int
=
110
,
Reserved
,
Reserved
=
300
,
Value
Value
};
};
...
@@ -217,11 +219,6 @@ class sac_parser
...
@@ -217,11 +219,6 @@ class sac_parser
bool
m_bol
;
bool
m_bol
;
CIFToken
m_lookahead
;
CIFToken
m_lookahead
;
static
constexpr
size_t
kRetractBufferSize
=
128
;
int
m_retract_buffer
[
kRetractBufferSize
];
int
*
m_retract_buffer_ptr
=
m_retract_buffer
;
// token buffer
// token buffer
std
::
vector
<
char
>
m_token_buffer
;
std
::
vector
<
char
>
m_token_buffer
;
std
::
string_view
m_token_value
;
std
::
string_view
m_token_value
;
...
...
include/cif++/validate.hpp
View file @
32f4749d
...
@@ -228,8 +228,9 @@ class validator_factory
...
@@ -228,8 +228,9 @@ class validator_factory
const
validator
&
operator
[](
std
::
string_view
dictionary_name
);
const
validator
&
operator
[](
std
::
string_view
dictionary_name
);
const
validator
&
construct_validator
(
std
::
string_view
name
,
std
::
istream
&
is
);
private
:
private
:
void
construct_validator
(
std
::
string_view
name
,
std
::
istream
&
is
);
// --------------------------------------------------------------------
// --------------------------------------------------------------------
...
...
src/parser.cpp
View file @
32f4749d
...
@@ -222,29 +222,25 @@ bool sac_parser::is_unquoted_string(std::string_view text)
...
@@ -222,29 +222,25 @@ bool sac_parser::is_unquoted_string(std::string_view text)
// translation.
// translation.
int
sac_parser
::
get_next_char
()
int
sac_parser
::
get_next_char
()
{
{
int
result
;
int
result
=
m_source
.
sbumpc
()
;
if
(
m_retract_buffer_ptr
==
m_retract_buffer
)
if
(
result
==
std
::
char_traits
<
char
>::
eof
()
)
result
=
m_source
.
sbumpc
(
);
m_token_buffer
.
push_back
(
0
);
else
else
result
=
*--
m_retract_buffer_ptr
;
{
// very simple CR/LF translation into LF
if
(
result
==
'\r'
)
if
(
result
==
'\r'
)
{
{
int
lookahead
=
m_source
.
sbumpc
();
if
(
m_source
.
sgetc
()
==
'\n'
)
if
(
lookahead
!=
'\n'
)
m_source
.
sbumpc
();
*
m_retract_buffer_ptr
++
=
lookahead
;
++
m_line_nr
;
result
=
'\n'
;
result
=
'\n'
;
}
}
else
if
(
result
==
'\n'
)
++
m_line_nr
;
if
(
result
==
std
::
char_traits
<
char
>::
eof
())
m_token_buffer
.
push_back
(
0
);
else
m_token_buffer
.
push_back
(
std
::
char_traits
<
char
>::
to_char_type
(
result
));
m_token_buffer
.
push_back
(
std
::
char_traits
<
char
>::
to_char_type
(
result
));
}
if
(
result
==
'\n'
)
++
m_line_nr
;
return
result
;
return
result
;
}
}
...
@@ -257,45 +253,16 @@ void sac_parser::retract()
...
@@ -257,45 +253,16 @@ void sac_parser::retract()
if
(
ch
==
'\n'
)
if
(
ch
==
'\n'
)
--
m_line_nr
;
--
m_line_nr
;
if
(
m_retract_buffer_ptr
==
m_retract_buffer
+
kRetractBufferSize
)
if
(
ch
!=
0
)
throw
cif
::
parse_error
(
m_line_nr
,
"Buffer overflow"
);
*
m_retract_buffer_ptr
++
=
ch
==
0
?
std
::
char_traits
<
char
>::
eof
()
:
std
::
char_traits
<
char
>::
to_int_type
(
ch
);
m_token_buffer
.
pop_back
();
}
int
sac_parser
::
restart
(
int
start
)
{
int
result
=
0
;
while
(
not
m_token_buffer
.
empty
())
retract
();
switch
(
start
)
{
{
case
State
:
:
Start
:
// since we always putback at most a single character,
result
=
State
::
Float
;
// the test below should never fail.
break
;
case
State
:
:
Float
:
result
=
State
::
Int
;
break
;
case
State
:
:
Int
:
result
=
State
::
Value
;
break
;
case
State
:
:
Reserved
:
if
(
m_source
.
sputbackc
(
ch
)
==
std
::
char_traits
<
char
>::
eof
())
result
=
State
::
Value
;
throw
std
::
runtime_error
(
"putback failure"
);
break
;
default
:
error
(
"Invalid state in SacParser"
);
}
}
m_bol
=
false
;
m_token_buffer
.
pop_back
();
return
result
;
}
}
sac_parser
::
CIFToken
sac_parser
::
get_next_token
()
sac_parser
::
CIFToken
sac_parser
::
get_next_token
()
...
@@ -304,7 +271,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -304,7 +271,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
CIFToken
result
=
CIFToken
::
Unknown
;
CIFToken
result
=
CIFToken
::
Unknown
;
int
quoteChar
=
0
;
int
quoteChar
=
0
;
int
state
=
State
::
Start
,
start
=
State
::
Start
;
State
state
=
State
::
Start
;
m_bol
=
false
;
m_bol
=
false
;
m_token_buffer
.
clear
();
m_token_buffer
.
clear
();
...
@@ -344,13 +311,13 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -344,13 +311,13 @@ sac_parser::CIFToken sac_parser::get_next_token()
else
if
(
dag
.
move
(
ch
)
==
reserved_words_automaton
::
undefined
)
else
if
(
dag
.
move
(
ch
)
==
reserved_words_automaton
::
undefined
)
state
=
State
::
Reserved
;
state
=
State
::
Reserved
;
else
else
state
=
start
=
restart
(
start
)
;
state
=
State
::
Value
;
break
;
break
;
case
State
:
:
White
:
case
State
:
:
White
:
if
(
ch
==
kEOF
)
if
(
ch
==
kEOF
)
result
=
CIFToken
::
Eof
;
result
=
CIFToken
::
Eof
;
else
if
(
not
isspace
(
ch
))
else
if
(
not
is
_
space
(
ch
))
{
{
state
=
State
::
Start
;
state
=
State
::
Start
;
retract
();
retract
();
...
@@ -380,19 +347,19 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -380,19 +347,19 @@ sac_parser::CIFToken sac_parser::get_next_token()
result
=
CIFToken
::
Value
;
result
=
CIFToken
::
Value
;
}
}
else
else
state
=
start
=
restart
(
start
)
;
state
=
State
::
Value
;
break
;
break
;
case
State
:
:
TextField
:
case
State
:
:
TextField
:
if
(
ch
==
'\n'
)
if
(
ch
==
'\n'
)
state
=
State
::
TextField
+
1
;
state
=
State
::
TextField
NL
;
else
if
(
ch
==
kEOF
)
else
if
(
ch
==
kEOF
)
error
(
"unterminated textfield"
);
error
(
"unterminated textfield"
);
else
if
(
not
is_any_print
(
ch
)
and
cif
::
VERBOSE
>
2
)
else
if
(
not
is_any_print
(
ch
)
and
cif
::
VERBOSE
>
2
)
warning
(
"invalid character in text field '"
+
std
::
string
({
static_cast
<
char
>
(
ch
)})
+
"' ("
+
std
::
to_string
((
int
)
ch
)
+
")"
);
warning
(
"invalid character in text field '"
+
std
::
string
({
static_cast
<
char
>
(
ch
)})
+
"' ("
+
std
::
to_string
((
int
)
ch
)
+
")"
);
break
;
break
;
case
State
:
:
TextField
+
1
:
case
State
:
:
TextField
NL
:
if
(
is_text_lead
(
ch
)
or
ch
==
' '
or
ch
==
'\t'
)
if
(
is_text_lead
(
ch
)
or
ch
==
' '
or
ch
==
'\t'
)
state
=
State
::
TextField
;
state
=
State
::
TextField
;
else
if
(
ch
==
';'
)
else
if
(
ch
==
';'
)
...
@@ -445,98 +412,21 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -445,98 +412,21 @@ sac_parser::CIFToken sac_parser::get_next_token()
}
}
break
;
break
;
case
State
:
:
Float
:
case
State
:
:
Reserved
:
if
(
ch
==
'+'
or
ch
==
'-'
)
switch
(
dag
.
move
(
ch
))
state
=
State
::
Float
+
1
;
else
if
((
ch
>=
'0'
and
ch
<=
'9'
))
state
=
State
::
Float
+
1
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Float
+
1
:
if
(
ch
==
'.'
)
state
=
State
::
Float
+
2
;
else
if
((
ch
&
~
0x20
)
==
'E'
)
state
=
State
::
Float
+
3
;
else
if
(
is_white
(
ch
)
or
ch
==
kEOF
)
{
retract
();
result
=
CIFToken
::
Value
;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
}
else
state
=
start
=
restart
(
start
);
break
;
// parsed '.'
case
State
:
:
Float
+
2
:
if
((
ch
&
~
0x20
)
==
'E'
)
state
=
State
::
Float
+
3
;
else
if
(
is_white
(
ch
)
or
ch
==
kEOF
)
{
retract
();
result
=
CIFToken
::
Value
;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
}
else
state
=
start
=
restart
(
start
);
break
;
// parsed 'e'
case
State
:
:
Float
+
3
:
if
(
ch
==
'-'
or
ch
==
'+'
)
state
=
State
::
Float
+
4
;
else
if
((
ch
>=
'0'
and
ch
<=
'9'
))
state
=
State
::
Float
+
5
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Float
+
4
:
if
((
ch
>=
'0'
and
ch
<=
'9'
))
state
=
State
::
Float
+
5
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Float
+
5
:
if
(
is_white
(
ch
)
or
ch
==
kEOF
)
{
{
retract
();
case
reserved_words_automaton
:
:
undefined
:
result
=
CIFToken
::
Value
;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
}
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Int
:
if
((
ch
>=
'0'
and
ch
<=
'9'
)
or
ch
==
'+'
or
ch
==
'-'
)
state
=
State
::
Int
+
1
;
else
state
=
start
=
restart
(
start
);
break
;
break
;
case
State
:
:
Int
+
1
:
case
reserved_words_automaton
:
:
no_keyword
:
if
(
is_white
(
ch
)
or
ch
==
kEOF
)
if
(
not
is_non_blank
(
ch
)
)
{
{
retract
();
retract
();
result
=
CIFToken
::
Value
;
result
=
CIFToken
::
Value
;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
}
}
else
else
state
=
start
=
restart
(
start
);
state
=
State
::
Value
;
break
;
case
State
:
:
Reserved
:
switch
(
dag
.
move
(
ch
))
{
case
reserved_words_automaton
:
:
undefined
:
break
;
case
reserved_words_automaton
:
:
no_keyword
:
state
=
start
=
restart
(
start
);
break
;
break
;
case
reserved_words_automaton
:
:
data
:
case
reserved_words_automaton
:
:
data
:
...
@@ -664,7 +554,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock)
...
@@ -664,7 +554,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock)
break
;
break
;
case
string_quote
:
case
string_quote
:
if
(
std
::
is
space
(
ch
))
if
(
is_
space
(
ch
))
state
=
start
;
state
=
start
;
else
else
state
=
string
;
state
=
string
;
...
@@ -676,7 +566,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock)
...
@@ -676,7 +566,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock)
break
;
break
;
case
data
:
case
data
:
if
(
isspace
(
ch
)
and
dblk
[
si
]
==
0
)
if
(
is
_
space
(
ch
)
and
dblk
[
si
]
==
0
)
found
=
true
;
found
=
true
;
else
if
(
dblk
[
si
++
]
!=
ch
)
else
if
(
dblk
[
si
++
]
!=
ch
)
state
=
start
;
state
=
start
;
...
@@ -754,7 +644,7 @@ sac_parser::datablock_index sac_parser::index_datablocks()
...
@@ -754,7 +644,7 @@ sac_parser::datablock_index sac_parser::index_datablocks()
break
;
break
;
case
string_quote
:
case
string_quote
:
if
(
std
::
is
space
(
ch
))
if
(
is_
space
(
ch
))
state
=
start
;
state
=
start
;
else
else
state
=
string
;
state
=
string
;
...
@@ -778,7 +668,7 @@ sac_parser::datablock_index sac_parser::index_datablocks()
...
@@ -778,7 +668,7 @@ sac_parser::datablock_index sac_parser::index_datablocks()
case
data_name
:
case
data_name
:
if
(
is_non_blank
(
ch
))
if
(
is_non_blank
(
ch
))
datablock
.
insert
(
datablock
.
end
(),
char
(
ch
));
datablock
.
insert
(
datablock
.
end
(),
char
(
ch
));
else
if
(
isspace
(
ch
))
else
if
(
is
_
space
(
ch
))
{
{
if
(
not
datablock
.
empty
())
if
(
not
datablock
.
empty
())
index
[
datablock
]
=
m_source
.
pubseekoff
(
0
,
std
::
ios_base
::
cur
,
std
::
ios_base
::
in
);
index
[
datablock
]
=
m_source
.
pubseekoff
(
0
,
std
::
ios_base
::
cur
,
std
::
ios_base
::
in
);
...
...
src/validate.cpp
View file @
32f4749d
...
@@ -491,9 +491,9 @@ const validator &validator_factory::operator[](std::string_view dictionary_name)
...
@@ -491,9 +491,9 @@ const validator &validator_factory::operator[](std::string_view dictionary_name)
}
}
}
}
void
validator_factory
::
construct_validator
(
std
::
string_view
name
,
std
::
istream
&
is
)
const
validator
&
validator_factory
::
construct_validator
(
std
::
string_view
name
,
std
::
istream
&
is
)
{
{
m_validators
.
emplace_back
(
parse_dictionary
(
name
,
is
));
return
m_validators
.
emplace_back
(
parse_dictionary
(
name
,
is
));
}
}
}
// namespace cif
}
// namespace cif
test/io-test.cpp
0 → 100644
View file @
32f4749d
#include <cif++.hpp>
class
dummy_parser
:
public
cif
::
sac_parser
{
public
:
dummy_parser
(
std
::
istream
&
is
)
:
sac_parser
(
is
)
{
}
void
produce_datablock
(
std
::
string_view
name
)
override
{
}
void
produce_category
(
std
::
string_view
name
)
override
{
}
void
produce_row
()
override
{
}
void
produce_item
(
std
::
string_view
category
,
std
::
string_view
item
,
std
::
string_view
value
)
override
{
}
};
int
main
()
{
cif
::
gzio
::
ifstream
in
(
"/srv/data/pdb/mmCIF/gl/8glv.cif.gz"
);
dummy_parser
parser
(
in
);
parser
.
parse_file
();
// cif::file f("/srv/data/pdb/mmCIF/gl/8glv.cif.gz");
return
0
;
}
\ No newline at end of file
test/unit-v2-test.cpp
View file @
32f4749d
...
@@ -2861,7 +2861,7 @@ save__cat_1.name
...
@@ -2861,7 +2861,7 @@ save__cat_1.name
std
::
istream
is_dict
(
&
buffer
);
std
::
istream
is_dict
(
&
buffer
);
auto
validator
=
cif
::
parse_dictionary
(
"test_dict.dic"
,
is_dict
);
auto
&
validator
=
cif
::
validator_factory
::
instance
().
construct_validator
(
"test_dict.dic"
,
is_dict
);
cif
::
file
f
;
cif
::
file
f
;
f
.
set_validator
(
&
validator
);
f
.
set_validator
(
&
validator
);
...
@@ -2899,8 +2899,6 @@ _cat_1.name
...
@@ -2899,8 +2899,6 @@ _cat_1.name
ss
<<
f
;
ss
<<
f
;
cif
::
file
f2
(
ss
);
cif
::
file
f2
(
ss
);
f2
.
set_validator
(
&
validator
);
BOOST_ASSERT
(
f2
.
is_valid
());
BOOST_ASSERT
(
f2
.
is_valid
());
auto
&
audit_conform
=
f2
.
front
()[
"audit_conform"
];
auto
&
audit_conform
=
f2
.
front
()[
"audit_conform"
];
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment