Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
libcifpp
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
open
libcifpp
Commits
ae9d247d
Unverified
Commit
ae9d247d
authored
Jun 05, 2023
by
Maarten L. Hekkelman
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
optimised the parser a bit
parent
16b7deaf
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
150 additions
and
86 deletions
+150
-86
include/cif++/parser.hpp
+24
-14
src/dictionary_parser.cpp
+1
-1
src/parser.cpp
+125
-71
No files found.
include/cif++/parser.hpp
View file @
ae9d247d
...
...
@@ -54,8 +54,6 @@ class sac_parser
public
:
using
datablock_index
=
std
::
map
<
std
::
string
,
std
::
size_t
>
;
sac_parser
(
std
::
istream
&
is
,
bool
init
=
true
);
virtual
~
sac_parser
()
=
default
;
enum
CharTraitsMask
:
uint8_t
...
...
@@ -186,6 +184,9 @@ class sac_parser
void
parse_file
();
protected
:
sac_parser
(
std
::
istream
&
is
,
bool
init
=
true
);
void
parse_global
();
void
parse_datablock
();
...
...
@@ -208,12 +209,13 @@ class sac_parser
// production methods, these are pure virtual here
virtual
void
produce_datablock
(
const
std
::
string
&
name
)
=
0
;
virtual
void
produce_category
(
const
std
::
string
&
name
)
=
0
;
virtual
void
produce_datablock
(
std
::
string_view
name
)
=
0
;
virtual
void
produce_category
(
std
::
string_view
name
)
=
0
;
virtual
void
produce_row
()
=
0
;
virtual
void
produce_item
(
const
std
::
string
&
category
,
const
std
::
string
&
item
,
const
std
::
string
&
value
)
=
0
;
virtual
void
produce_item
(
std
::
string_view
category
,
std
::
string_view
item
,
std
::
string_view
value
)
=
0
;
protected
:
enum
State
{
Start
,
...
...
@@ -229,9 +231,8 @@ class sac_parser
TextField
,
Float
=
100
,
Int
=
110
,
Value
=
300
,
DATA
,
SAVE
Reserved
=
300
,
Value
};
std
::
streambuf
&
m_source
;
...
...
@@ -241,9 +242,18 @@ class sac_parser
uint32_t
m_line_nr
;
bool
m_bol
;
CIFToken
m_lookahead
;
std
::
string
m_token_value
;
CIFValue
mTokenType
;
std
::
vector
<
int
>
m_buffer
;
// retract buffer, used to be a stack<char>
// std::string m_token_value;
// CIFValue mTokenType;
// std::vector<int> m_buffer; // retract buffer, used to be a stack<char>
static
constexpr
size_t
kBufferSize
=
128
;
int
m_buffer
[
kBufferSize
];
int
*
m_buffer_ptr
=
m_buffer
;
// token buffer
std
::
vector
<
char
>
m_token_buffer
;
std
::
string_view
m_token_value
;
};
// --------------------------------------------------------------------
...
...
@@ -257,13 +267,13 @@ class parser : public sac_parser
{
}
void
produce_datablock
(
const
std
::
string
&
name
)
override
;
void
produce_datablock
(
std
::
string_view
name
)
override
;
void
produce_category
(
const
std
::
string
&
name
)
override
;
void
produce_category
(
std
::
string_view
name
)
override
;
void
produce_row
()
override
;
void
produce_item
(
const
std
::
string
&
category
,
const
std
::
string
&
item
,
const
std
::
string
&
value
)
override
;
void
produce_item
(
std
::
string_view
category
,
std
::
string_view
item
,
std
::
string_view
value
)
override
;
protected
:
file
&
m_file
;
...
...
src/dictionary_parser.cpp
View file @
ae9d247d
...
...
@@ -117,7 +117,7 @@ class dictionary_parser : public parser
if
(
not
m_collected_item_types
)
m_collected_item_types
=
collect_item_types
();
std
::
string
saveFrameName
=
m_token_value
;
std
::
string
saveFrameName
{
m_token_value
}
;
if
(
saveFrameName
.
empty
())
error
(
"Invalid save frame, should contain more than just 'save_' here"
);
...
...
src/parser.cpp
View file @
ae9d247d
...
...
@@ -185,27 +185,24 @@ int sac_parser::get_next_char()
{
int
result
=
std
::
char_traits
<
char
>::
eof
();
if
(
m_buffer
.
empty
()
)
if
(
m_buffer
_ptr
==
m_buffer
)
result
=
m_source
.
sbumpc
();
else
{
result
=
m_buffer
.
back
();
m_buffer
.
pop_back
();
}
result
=
*--
m_buffer_ptr
;
// very simple CR/LF translation into LF
if
(
result
==
'\r'
)
{
int
lookahead
=
m_source
.
sbumpc
();
if
(
lookahead
!=
'\n'
)
m_buffer
.
push_back
(
lookahead
)
;
*
m_buffer_ptr
++
=
lookahead
;
result
=
'\n'
;
}
if
(
result
==
std
::
char_traits
<
char
>::
eof
())
m_token_
value
.
push_back
(
0
);
m_token_
buffer
.
push_back
(
0
);
else
m_token_
value
.
push_back
(
std
::
char_traits
<
char
>::
to_char_type
(
result
));
m_token_
buffer
.
push_back
(
std
::
char_traits
<
char
>::
to_char_type
(
result
));
if
(
result
==
'\n'
)
++
m_line_nr
;
...
...
@@ -224,21 +221,24 @@ int sac_parser::get_next_char()
void
sac_parser
::
retract
()
{
assert
(
not
m_token_
value
.
empty
());
assert
(
not
m_token_
buffer
.
empty
());
char
ch
=
m_token_
value
.
back
();
char
ch
=
m_token_
buffer
.
back
();
if
(
ch
==
'\n'
)
--
m_line_nr
;
m_buffer
.
push_back
(
ch
==
0
?
std
::
char_traits
<
char
>::
eof
()
:
std
::
char_traits
<
char
>::
to_int_type
(
ch
));
m_token_value
.
pop_back
();
if
(
m_buffer_ptr
==
m_buffer
+
kBufferSize
)
throw
cif
::
parse_error
(
m_line_nr
,
"Buffer overflow"
);
*
m_buffer_ptr
++
=
ch
==
0
?
std
::
char_traits
<
char
>::
eof
()
:
std
::
char_traits
<
char
>::
to_int_type
(
ch
);
m_token_buffer
.
pop_back
();
}
int
sac_parser
::
restart
(
int
start
)
{
int
result
=
0
;
while
(
not
m_token_
value
.
empty
())
while
(
not
m_token_
buffer
.
empty
())
retract
();
switch
(
start
)
...
...
@@ -252,6 +252,10 @@ int sac_parser::restart(int start)
break
;
case
State
:
:
Int
:
result
=
State
::
Reserved
;
break
;
case
State
:
:
Reserved
:
result
=
State
::
Value
;
break
;
...
...
@@ -273,8 +277,9 @@ sac_parser::CIFToken sac_parser::get_next_token()
int
state
=
State
::
Start
,
start
=
State
::
Start
;
m_bol
=
false
;
m_token_value
.
clear
();
mTokenType
=
CIFValue
::
Unknown
;
m_token_buffer
.
clear
();
// mTokenType = CIFValue::Unknown;
m_token_value
=
{};
while
(
result
==
CIFToken
::
Unknown
)
{
...
...
@@ -298,6 +303,8 @@ sac_parser::CIFToken sac_parser::get_next_token()
state
=
State
::
Tag
;
else
if
(
ch
==
';'
and
m_bol
)
state
=
State
::
TextField
;
else
if
(
ch
==
'?'
)
state
=
State
::
QuestionMark
;
else
if
(
ch
==
'\''
or
ch
==
'"'
)
{
quoteChar
=
ch
;
...
...
@@ -314,7 +321,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
{
state
=
State
::
Start
;
retract
();
m_token_
value
.
clear
();
m_token_
buffer
.
clear
();
}
else
m_bol
=
(
ch
==
'\n'
);
...
...
@@ -325,13 +332,24 @@ sac_parser::CIFToken sac_parser::get_next_token()
{
state
=
State
::
Start
;
m_bol
=
true
;
m_token_
value
.
clear
();
m_token_
buffer
.
clear
();
}
else
if
(
ch
==
kEOF
)
result
=
CIFToken
::
Eof
;
else
if
(
not
is_any_print
(
ch
))
error
(
"invalid character in comment"
);
break
;
case
State
:
:
QuestionMark
:
if
(
not
is_non_blank
(
ch
))
{
retract
();
result
=
CIFToken
::
Value
;
// m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.data() + 1);
}
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
TextField
:
if
(
ch
==
'\n'
)
...
...
@@ -354,9 +372,9 @@ sac_parser::CIFToken sac_parser::get_next_token()
state
=
State
::
TextField
;
else
if
(
ch
==
';'
)
{
assert
(
m_token_
value
.
length
()
>=
2
);
m_token_value
=
m_token_value
.
substr
(
1
,
m_token_value
.
length
()
-
3
);
mTokenType
=
CIFValue
::
TextField
;
assert
(
m_token_
buffer
.
size
()
>=
2
);
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
()
+
1
,
m_token_buffer
.
data
()
+
m_token_buffer
.
size
()
-
2
);
//
mTokenType = CIFValue::TextField;
result
=
CIFToken
::
Value
;
}
else
if
(
ch
==
kEOF
)
...
...
@@ -379,12 +397,12 @@ sac_parser::CIFToken sac_parser::get_next_token()
{
retract
();
result
=
CIFToken
::
Value
;
mTokenType
=
CIFValue
::
String
;
//
mTokenType = CIFValue::String;
if
(
m_token_
value
.
length
()
<
2
)
if
(
m_token_
buffer
.
size
()
<
2
)
error
(
"Invalid quoted string token"
);
m_token_value
=
m_token_value
.
substr
(
1
,
m_token_value
.
length
()
-
2
);
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
()
+
1
,
m_token_buffer
.
data
()
+
m_token_buffer
.
size
()
-
1
);
}
else
if
(
ch
==
quoteChar
)
;
...
...
@@ -401,6 +419,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
{
retract
();
result
=
CIFToken
::
Tag
;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
data
()
+
m_token_buffer
.
size
());
}
break
;
...
...
@@ -427,7 +446,8 @@ sac_parser::CIFToken sac_parser::get_next_token()
{
retract
();
result
=
CIFToken
::
Value
;
mTokenType
=
CIFValue
::
Int
;
// mTokenType = CIFValue::Int;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
data
()
+
m_token_buffer
.
size
());
}
else
state
=
start
=
restart
(
start
);
...
...
@@ -441,7 +461,8 @@ sac_parser::CIFToken sac_parser::get_next_token()
{
retract
();
result
=
CIFToken
::
Value
;
mTokenType
=
CIFValue
::
Float
;
// mTokenType = CIFValue::Float;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
data
()
+
m_token_buffer
.
size
());
}
else
state
=
start
=
restart
(
start
);
...
...
@@ -469,7 +490,8 @@ sac_parser::CIFToken sac_parser::get_next_token()
{
retract
();
result
=
CIFToken
::
Value
;
mTokenType
=
CIFValue
::
Float
;
// mTokenType = CIFValue::Float;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
data
()
+
m_token_buffer
.
size
());
}
else
state
=
start
=
restart
(
start
);
...
...
@@ -487,63 +509,95 @@ sac_parser::CIFToken sac_parser::get_next_token()
{
retract
();
result
=
CIFToken
::
Value
;
mTokenType
=
CIFValue
::
Int
;
// mTokenType = CIFValue::Int;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
data
()
+
m_token_buffer
.
size
());
}
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Value
:
if
(
ch
==
'_'
)
case
State
:
:
Reserved
:
switch
(
ch
&
~
0x20
)
{
std
::
string
s
=
to_lower_copy
(
m_token_value
);
if
(
s
==
"data_"
)
{
state
=
State
::
DATA
;
continue
;
}
if
(
s
==
"save_"
)
{
state
=
State
::
SAVE
;
continue
;
}
case
'D'
:
// data_
state
=
State
::
Reserved
+
10
;
break
;
case
'G'
:
state
=
State
::
Reserved
+
20
;
// global_
break
;
case
'L'
:
state
=
State
::
Reserved
+
30
;
// loop_
break
;
case
'S'
:
state
=
State
::
Reserved
+
40
;
// stop_ | save_
break
;
default
:
state
=
start
=
restart
(
start
);
break
;
}
if
(
result
==
CIFToken
::
Unknown
and
not
is_non_blank
(
ch
))
break
;
case
State
:
:
Reserved
+
10
:
if
((
ch
&
~
0x20
)
==
'A'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
11
:
if
((
ch
&
~
0x20
)
==
'T'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
12
:
if
((
ch
&
~
0x20
)
==
'A'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
13
:
if
((
ch
&
~
0x20
)
==
'_'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
14
:
if
(
is_non_blank
(
ch
))
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
15
:
if
(
not
is_non_blank
(
ch
))
{
retract
();
result
=
CIFToken
::
Value
;
if
(
m_token_value
==
"."
)
mTokenType
=
CIFValue
::
Inapplicable
;
else
if
(
iequals
(
m_token_value
,
"global_"
))
result
=
CIFToken
::
GLOBAL
;
else
if
(
iequals
(
m_token_value
,
"stop_"
))
result
=
CIFToken
::
STOP
;
else
if
(
iequals
(
m_token_value
,
"loop_"
))
result
=
CIFToken
::
LOOP
;
else
if
(
m_token_value
==
"?"
)
{
mTokenType
=
CIFValue
::
Unknown
;
m_token_value
.
clear
();
}
result
=
CIFToken
::
DATA
;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
()
+
5
,
m_token_buffer
.
data
()
+
m_token_buffer
.
size
());
}
break
;
case
State
:
:
DATA
:
case
State
:
:
SAVE
:
case
State
:
:
Reserved
+
20
:
if
((
ch
&
~
0x20
)
==
'L'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
21
:
if
((
ch
&
~
0x20
)
==
'O'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
22
:
if
((
ch
&
~
0x20
)
==
'B'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
23
:
if
((
ch
&
~
0x20
)
==
'A'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
24
:
if
((
ch
&
~
0x20
)
==
'L'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
25
:
if
((
ch
&
~
0x20
)
==
'_'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
26
:
if
(
not
is_non_blank
(
ch
))
result
=
CIFToken
::
GLOBAL
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
30
:
if
((
ch
&
~
0x20
)
==
'O'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
31
:
if
((
ch
&
~
0x20
)
==
'O'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
32
:
if
((
ch
&
~
0x20
)
==
'P'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
33
:
if
((
ch
&
~
0x20
)
==
'_'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
34
:
if
(
not
is_non_blank
(
ch
))
result
=
CIFToken
::
LOOP
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
40
:
if
((
ch
&
~
0x20
)
==
'A'
)
state
=
State
::
Reserved
+
41
;
else
if
((
ch
&
~
0x20
)
==
'T'
)
state
=
State
::
Reserved
+
51
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
41
:
if
((
ch
&
~
0x20
)
==
'V'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
42
:
if
((
ch
&
~
0x20
)
==
'E'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
43
:
if
(
is_non_blank
(
ch
))
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
44
:
if
(
not
is_non_blank
(
ch
))
{
retract
();
result
=
CIFToken
::
SAVE
;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
()
+
5
,
m_token_buffer
.
data
()
+
m_token_buffer
.
size
());
}
break
;
if
(
state
==
State
::
DATA
)
result
=
CIFToken
::
DATA
;
else
result
=
CIFToken
::
SAVE
;
case
State
:
:
Reserved
+
51
:
if
((
ch
&
~
0x20
)
==
'O'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
52
:
if
((
ch
&
~
0x20
)
==
'P'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
53
:
if
((
ch
&
~
0x20
)
==
'_'
)
++
state
;
else
state
=
start
=
restart
(
start
);
break
;
case
State
:
:
Reserved
+
54
:
if
(
not
is_non_blank
(
ch
))
result
=
CIFToken
::
STOP
;
else
state
=
start
=
restart
(
start
);
break
;
m_token_value
.
erase
(
m_token_value
.
begin
(),
m_token_value
.
begin
()
+
5
);
case
State
:
:
Value
:
if
(
not
is_non_blank
(
ch
))
{
retract
();
result
=
CIFToken
::
Value
;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
data
()
+
m_token_buffer
.
size
());
break
;
}
break
;
...
...
@@ -557,8 +611,8 @@ sac_parser::CIFToken sac_parser::get_next_token()
if
(
VERBOSE
>=
5
)
{
std
::
cerr
<<
get_token_name
(
result
);
if
(
mTokenType
!=
CIFValue
::
Unknown
)
std
::
cerr
<<
' '
<<
get_value_name
(
mTokenType
);
//
if (mTokenType != CIFValue::Unknown)
//
std::cerr << ' ' << get_value_name(mTokenType);
if
(
result
!=
CIFToken
::
Eof
)
std
::
cerr
<<
" "
<<
std
::
quoted
(
m_token_value
);
std
::
cerr
<<
std
::
endl
;
...
...
@@ -903,7 +957,7 @@ void sac_parser::parse_save_frame()
// --------------------------------------------------------------------
void
parser
::
produce_datablock
(
const
std
::
string
&
name
)
void
parser
::
produce_datablock
(
std
::
string_view
name
)
{
if
(
VERBOSE
>=
4
)
std
::
cerr
<<
"producing data_"
<<
name
<<
std
::
endl
;
...
...
@@ -912,7 +966,7 @@ void parser::produce_datablock(const std::string &name)
m_datablock
=
&
(
*
iter
);
}
void
parser
::
produce_category
(
const
std
::
string
&
name
)
void
parser
::
produce_category
(
std
::
string_view
name
)
{
if
(
VERBOSE
>=
4
)
std
::
cerr
<<
"producing category "
<<
name
<<
std
::
endl
;
...
...
@@ -934,7 +988,7 @@ void parser::produce_row()
// m_row.lineNr(m_line_nr);
}
void
parser
::
produce_item
(
const
std
::
string
&
category
,
const
std
::
string
&
item
,
const
std
::
string
&
value
)
void
parser
::
produce_item
(
std
::
string_view
category
,
std
::
string_view
item
,
std
::
string_view
value
)
{
if
(
VERBOSE
>=
4
)
std
::
cerr
<<
"producing _"
<<
category
<<
'.'
<<
item
<<
" -> "
<<
value
<<
std
::
endl
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment