Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
libcifpp
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
open
libcifpp
Commits
32f4749d
Unverified
Commit
32f4749d
authored
Jun 07, 2023
by
Maarten L. Hekkelman
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
faster cif parser
parent
da12be87
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
101 additions
and
171 deletions
+101
-171
CMakeLists.txt
+1
-1
changelog
+4
-0
include/cif++/parser.hpp
+13
-16
include/cif++/validate.hpp
+2
-1
src/parser.cpp
+38
-148
src/validate.cpp
+2
-2
test/io-test.cpp
+40
-0
test/unit-v2-test.cpp
+1
-3
No files found.
CMakeLists.txt
View file @
32f4749d
...
...
@@ -25,7 +25,7 @@
cmake_minimum_required
(
VERSION 3.16
)
# set the project name
project
(
cifpp VERSION 5.0.
9
LANGUAGES CXX
)
project
(
cifpp VERSION 5.0.
10
LANGUAGES CXX
)
list
(
PREPEND CMAKE_MODULE_PATH
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/cmake"
)
...
...
changelog
View file @
32f4749d
Version
5.0.10
-
Fix
in
progress_bar
,
was
using
too
much
CPU
-
Optimised
mmCIF
parser
Version
5.0.9
-
Fix
in
dihedral
angle
calculations
-
Added
create_water
to
model
...
...
include/cif++/parser.hpp
View file @
32f4749d
...
...
@@ -63,9 +63,14 @@ class sac_parser
kAnyPrintMask
=
1
<<
3
};
static
bool
is_whit
e
(
int
ch
)
static
constexpr
bool
is_spac
e
(
int
ch
)
{
return
std
::
isspace
(
ch
)
or
ch
==
'#'
;
return
ch
==
' '
or
ch
==
'\t'
or
ch
==
'\r'
or
ch
==
'\n'
;
}
static
constexpr
bool
is_white
(
int
ch
)
{
return
is_space
(
ch
)
or
ch
==
'#'
;
}
static
constexpr
bool
is_ordinary
(
int
ch
)
...
...
@@ -136,15 +141,13 @@ class sac_parser
}
}
// get_next_char takes a char from the buffer, or if it is empty
// from the istream. This function also does carriage/linefeed
// translation.
// get_next_char takes the next character from the istream.
// This function also does carriage/linefeed translation.
int
get_next_char
();
// Put the last read character back into the istream
void
retract
();
int
restart
(
int
start
);
CIFToken
get_next_token
();
void
match
(
CIFToken
token
);
...
...
@@ -191,7 +194,7 @@ class sac_parser
protected
:
enum
State
enum
class
State
{
Start
,
White
,
...
...
@@ -204,9 +207,8 @@ class sac_parser
UnquotedString
,
Tag
,
TextField
,
Float
=
100
,
Int
=
110
,
Reserved
=
300
,
TextFieldNL
,
Reserved
,
Value
};
...
...
@@ -217,11 +219,6 @@ class sac_parser
bool
m_bol
;
CIFToken
m_lookahead
;
static
constexpr
size_t
kRetractBufferSize
=
128
;
int
m_retract_buffer
[
kRetractBufferSize
];
int
*
m_retract_buffer_ptr
=
m_retract_buffer
;
// token buffer
std
::
vector
<
char
>
m_token_buffer
;
std
::
string_view
m_token_value
;
...
...
include/cif++/validate.hpp
View file @
32f4749d
...
...
@@ -228,8 +228,9 @@ class validator_factory
const
validator
&
operator
[](
std
::
string_view
dictionary_name
);
const
validator
&
construct_validator
(
std
::
string_view
name
,
std
::
istream
&
is
);
private
:
void
construct_validator
(
std
::
string_view
name
,
std
::
istream
&
is
);
// --------------------------------------------------------------------
...
...
src/parser.cpp
View file @
32f4749d
...
...
@@ -222,29 +222,25 @@ bool sac_parser::is_unquoted_string(std::string_view text)
// translation.
int
sac_parser
::
get_next_char
()
{
int
result
;
if
(
m_retract_buffer_ptr
==
m_retract_buffer
)
result
=
m_source
.
sbumpc
();
else
result
=
*--
m_retract_buffer_ptr
;
// very simple CR/LF translation into LF
if
(
result
==
'\r'
)
{
int
lookahead
=
m_source
.
sbumpc
();
if
(
lookahead
!=
'\n'
)
*
m_retract_buffer_ptr
++
=
lookahead
;
result
=
'\n'
;
}
int
result
=
m_source
.
sbumpc
();
if
(
result
==
std
::
char_traits
<
char
>::
eof
())
m_token_buffer
.
push_back
(
0
);
else
m_token_buffer
.
push_back
(
std
::
char_traits
<
char
>::
to_char_type
(
result
));
{
if
(
result
==
'\r'
)
{
if
(
m_source
.
sgetc
()
==
'\n'
)
m_source
.
sbumpc
();
if
(
result
==
'\n'
)
++
m_line_nr
;
++
m_line_nr
;
result
=
'\n'
;
}
else
if
(
result
==
'\n'
)
++
m_line_nr
;
m_token_buffer
.
push_back
(
std
::
char_traits
<
char
>::
to_char_type
(
result
));
}
return
result
;
}
...
...
@@ -257,45 +253,16 @@ void sac_parser::retract()
if
(
ch
==
'\n'
)
--
m_line_nr
;
if
(
m_retract_buffer_ptr
==
m_retract_buffer
+
kRetractBufferSize
)
throw
cif
::
parse_error
(
m_line_nr
,
"Buffer overflow"
);
*
m_retract_buffer_ptr
++
=
ch
==
0
?
std
::
char_traits
<
char
>::
eof
()
:
std
::
char_traits
<
char
>::
to_int_type
(
ch
);
m_token_buffer
.
pop_back
();
}
int
sac_parser
::
restart
(
int
start
)
{
int
result
=
0
;
while
(
not
m_token_buffer
.
empty
())
retract
();
switch
(
start
)
if
(
ch
!=
0
)
{
case
State
:
:
Start
:
result
=
State
::
Float
;
break
;
case
State
:
:
Float
:
result
=
State
::
Int
;
break
;
// since we always putback at most a single character,
// the test below should never fail.
case
State
:
:
Int
:
result
=
State
::
Value
;
break
;
case
State
:
:
Reserved
:
result
=
State
::
Value
;
break
;
default
:
error
(
"Invalid state in SacParser"
);
if
(
m_source
.
sputbackc
(
ch
)
==
std
::
char_traits
<
char
>::
eof
())
throw
std
::
runtime_error
(
"putback failure"
);
}
m_bol
=
false
;
return
result
;
m_token_buffer
.
pop_back
();
}
sac_parser
::
CIFToken
sac_parser
::
get_next_token
()
...
...
@@ -304,7 +271,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
CIFToken
result
=
CIFToken
::
Unknown
;
int
quoteChar
=
0
;
int
state
=
State
::
Start
,
start
=
State
::
Start
;
State
state
=
State
::
Start
;
m_bol
=
false
;
m_token_buffer
.
clear
();
...
...
@@ -344,13 +311,13 @@ sac_parser::CIFToken sac_parser::get_next_token()
else
if
(
dag
.
move
(
ch
)
==
reserved_words_automaton
::
undefined
)
state
=
State
::
Reserved
;
else
state
=
start
=
restart
(
start
)
;
state
=
State
::
Value
;
break
;
case
State
:
:
White
:
if
(
ch
==
kEOF
)
result
=
CIFToken
::
Eof
;
else
if
(
not
isspace
(
ch
))
else
if
(
not
is
_
space
(
ch
))
{
state
=
State
::
Start
;
retract
();
...
...
@@ -380,19 +347,19 @@ sac_parser::CIFToken sac_parser::get_next_token()
result
=
CIFToken
::
Value
;
}
else
state
=
start
=
restart
(
start
)
;
state
=
State
::
Value
;
break
;
case
State
:
:
TextField
:
if
(
ch
==
'\n'
)
state
=
State
::
TextField
+
1
;
state
=
State
::
TextField
NL
;
else
if
(
ch
==
kEOF
)
error
(
"unterminated textfield"
);
else
if
(
not
is_any_print
(
ch
)
and
cif
::
VERBOSE
>
2
)
warning
(
"invalid character in text field '"
+
std
::
string
({
static_cast
<
char
>
(
ch
)})
+
"' ("
+
std
::
to_string
((
int
)
ch
)
+
")"
);
break
;
case
State
:
:
TextField
+
1
:
case
State
:
:
TextField
NL
:
if
(
is_text_lead
(
ch
)
or
ch
==
' '
or
ch
==
'\t'
)
state
=
State
::
TextField
;
else
if
(
ch
==
';'
)
...
...
@@ -445,90 +412,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
}
break
;
case
State
:
:
Float
:
if
(
ch
==
'+'
or
ch
==
'-'
)
state
=
State
::
Float
+
1
;
else
if
((
ch
>=
'0'
and
ch
<=
'9'
))
state
=
State
::
Float
+
1
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Float
+
1
:
if
(
ch
==
'.'
)
state
=
State
::
Float
+
2
;
else
if
((
ch
&
~
0x20
)
==
'E'
)
state
=
State
::
Float
+
3
;
else
if
(
is_white
(
ch
)
or
ch
==
kEOF
)
{
retract
();
result
=
CIFToken
::
Value
;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
}
else
state
=
start
=
restart
(
start
);
break
;
// parsed '.'
case
State
:
:
Float
+
2
:
if
((
ch
&
~
0x20
)
==
'E'
)
state
=
State
::
Float
+
3
;
else
if
(
is_white
(
ch
)
or
ch
==
kEOF
)
{
retract
();
result
=
CIFToken
::
Value
;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
}
else
state
=
start
=
restart
(
start
);
break
;
// parsed 'e'
case
State
:
:
Float
+
3
:
if
(
ch
==
'-'
or
ch
==
'+'
)
state
=
State
::
Float
+
4
;
else
if
((
ch
>=
'0'
and
ch
<=
'9'
))
state
=
State
::
Float
+
5
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Float
+
4
:
if
((
ch
>=
'0'
and
ch
<=
'9'
))
state
=
State
::
Float
+
5
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Float
+
5
:
if
(
is_white
(
ch
)
or
ch
==
kEOF
)
{
retract
();
result
=
CIFToken
::
Value
;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
}
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Int
:
if
((
ch
>=
'0'
and
ch
<=
'9'
)
or
ch
==
'+'
or
ch
==
'-'
)
state
=
State
::
Int
+
1
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Int
+
1
:
if
(
is_white
(
ch
)
or
ch
==
kEOF
)
{
retract
();
result
=
CIFToken
::
Value
;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
}
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
:
switch
(
dag
.
move
(
ch
))
{
...
...
@@ -536,7 +419,14 @@ sac_parser::CIFToken sac_parser::get_next_token()
break
;
case
reserved_words_automaton
:
:
no_keyword
:
state
=
start
=
restart
(
start
);
if
(
not
is_non_blank
(
ch
))
{
retract
();
result
=
CIFToken
::
Value
;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
}
else
state
=
State
::
Value
;
break
;
case
reserved_words_automaton
:
:
data
:
...
...
@@ -664,7 +554,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock)
break
;
case
string_quote
:
if
(
std
::
is
space
(
ch
))
if
(
is_
space
(
ch
))
state
=
start
;
else
state
=
string
;
...
...
@@ -676,7 +566,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock)
break
;
case
data
:
if
(
isspace
(
ch
)
and
dblk
[
si
]
==
0
)
if
(
is
_
space
(
ch
)
and
dblk
[
si
]
==
0
)
found
=
true
;
else
if
(
dblk
[
si
++
]
!=
ch
)
state
=
start
;
...
...
@@ -754,7 +644,7 @@ sac_parser::datablock_index sac_parser::index_datablocks()
break
;
case
string_quote
:
if
(
std
::
is
space
(
ch
))
if
(
is_
space
(
ch
))
state
=
start
;
else
state
=
string
;
...
...
@@ -778,7 +668,7 @@ sac_parser::datablock_index sac_parser::index_datablocks()
case
data_name
:
if
(
is_non_blank
(
ch
))
datablock
.
insert
(
datablock
.
end
(),
char
(
ch
));
else
if
(
isspace
(
ch
))
else
if
(
is
_
space
(
ch
))
{
if
(
not
datablock
.
empty
())
index
[
datablock
]
=
m_source
.
pubseekoff
(
0
,
std
::
ios_base
::
cur
,
std
::
ios_base
::
in
);
...
...
src/validate.cpp
View file @
32f4749d
...
...
@@ -491,9 +491,9 @@ const validator &validator_factory::operator[](std::string_view dictionary_name)
}
}
void
validator_factory
::
construct_validator
(
std
::
string_view
name
,
std
::
istream
&
is
)
const
validator
&
validator_factory
::
construct_validator
(
std
::
string_view
name
,
std
::
istream
&
is
)
{
m_validators
.
emplace_back
(
parse_dictionary
(
name
,
is
));
return
m_validators
.
emplace_back
(
parse_dictionary
(
name
,
is
));
}
}
// namespace cif
test/io-test.cpp
0 → 100644
View file @
32f4749d
#include <cif++.hpp>
class
dummy_parser
:
public
cif
::
sac_parser
{
public
:
dummy_parser
(
std
::
istream
&
is
)
:
sac_parser
(
is
)
{
}
void
produce_datablock
(
std
::
string_view
name
)
override
{
}
void
produce_category
(
std
::
string_view
name
)
override
{
}
void
produce_row
()
override
{
}
void
produce_item
(
std
::
string_view
category
,
std
::
string_view
item
,
std
::
string_view
value
)
override
{
}
};
int
main
()
{
cif
::
gzio
::
ifstream
in
(
"/srv/data/pdb/mmCIF/gl/8glv.cif.gz"
);
dummy_parser
parser
(
in
);
parser
.
parse_file
();
// cif::file f("/srv/data/pdb/mmCIF/gl/8glv.cif.gz");
return
0
;
}
\ No newline at end of file
test/unit-v2-test.cpp
View file @
32f4749d
...
...
@@ -2861,7 +2861,7 @@ save__cat_1.name
std
::
istream
is_dict
(
&
buffer
);
auto
validator
=
cif
::
parse_dictionary
(
"test_dict.dic"
,
is_dict
);
auto
&
validator
=
cif
::
validator_factory
::
instance
().
construct_validator
(
"test_dict.dic"
,
is_dict
);
cif
::
file
f
;
f
.
set_validator
(
&
validator
);
...
...
@@ -2899,8 +2899,6 @@ _cat_1.name
ss
<<
f
;
cif
::
file
f2
(
ss
);
f2
.
set_validator
(
&
validator
);
BOOST_ASSERT
(
f2
.
is_valid
());
auto
&
audit_conform
=
f2
.
front
()[
"audit_conform"
];
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment