Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
libcifpp
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
open
libcifpp
Commits
94a38ad4
Commit
94a38ad4
authored
Jun 06, 2023
by
Maarten L. Hekkelman
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop
parents
20ef79a1
92bf2547
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
19 additions
and
134 deletions
+19
-134
include/cif++/parser.hpp
+3
-8
src/parser.cpp
+16
-123
src/utilities.cpp
+0
-1
test/unit-v2-test.cpp
+0
-2
No files found.
include/cif++/parser.hpp
View file @
94a38ad4
...
@@ -29,7 +29,6 @@
...
@@ -29,7 +29,6 @@
#include "cif++/row.hpp"
#include "cif++/row.hpp"
#include <map>
#include <map>
#include <regex>
namespace
cif
namespace
cif
{
{
...
@@ -214,18 +213,14 @@ class sac_parser
...
@@ -214,18 +213,14 @@ class sac_parser
std
::
streambuf
&
m_source
;
std
::
streambuf
&
m_source
;
// Parser state
// Parser state
bool
m_validate
;
uint32_t
m_line_nr
;
uint32_t
m_line_nr
;
bool
m_bol
;
bool
m_bol
;
CIFToken
m_lookahead
;
CIFToken
m_lookahead
;
// std::string m_token_value;
// CIFValue mTokenType;
// std::vector<int> m_buffer; // retract buffer, used to be a stack<char>
static
constexpr
size_t
kBufferSize
=
128
;
static
constexpr
size_t
k
Retract
BufferSize
=
128
;
int
m_
buffer
[
k
BufferSize
];
int
m_
retract_buffer
[
kRetract
BufferSize
];
int
*
m_
buffer_ptr
=
m
_buffer
;
int
*
m_
retract_buffer_ptr
=
m_retract
_buffer
;
// token buffer
// token buffer
std
::
vector
<
char
>
m_token_buffer
;
std
::
vector
<
char
>
m_token_buffer
;
...
...
src/parser.cpp
View file @
94a38ad4
...
@@ -32,7 +32,6 @@
...
@@ -32,7 +32,6 @@
#include <cassert>
#include <cassert>
#include <iostream>
#include <iostream>
#include <map>
#include <map>
#include <regex>
#include <stack>
#include <stack>
namespace
cif
namespace
cif
...
@@ -186,7 +185,6 @@ sac_parser::sac_parser(std::istream &is, bool init)
...
@@ -186,7 +185,6 @@ sac_parser::sac_parser(std::istream &is, bool init)
if
(
is
.
rdbuf
()
==
nullptr
)
if
(
is
.
rdbuf
()
==
nullptr
)
throw
std
::
runtime_error
(
"Attempt to read from uninitialised stream"
);
throw
std
::
runtime_error
(
"Attempt to read from uninitialised stream"
);
m_validate
=
true
;
m_line_nr
=
1
;
m_line_nr
=
1
;
m_bol
=
true
;
m_bol
=
true
;
...
@@ -224,19 +222,19 @@ bool sac_parser::is_unquoted_string(std::string_view text)
...
@@ -224,19 +222,19 @@ bool sac_parser::is_unquoted_string(std::string_view text)
// translation.
// translation.
int
sac_parser
::
get_next_char
()
int
sac_parser
::
get_next_char
()
{
{
int
result
=
std
::
char_traits
<
char
>::
eof
()
;
int
result
;
if
(
m_
buffer_ptr
==
m
_buffer
)
if
(
m_
retract_buffer_ptr
==
m_retract
_buffer
)
result
=
m_source
.
sbumpc
();
result
=
m_source
.
sbumpc
();
else
else
result
=
*--
m_buffer_ptr
;
result
=
*--
m_
retract_
buffer_ptr
;
// very simple CR/LF translation into LF
// very simple CR/LF translation into LF
if
(
result
==
'\r'
)
if
(
result
==
'\r'
)
{
{
int
lookahead
=
m_source
.
sbumpc
();
int
lookahead
=
m_source
.
sbumpc
();
if
(
lookahead
!=
'\n'
)
if
(
lookahead
!=
'\n'
)
*
m_buffer_ptr
++
=
lookahead
;
*
m_
retract_
buffer_ptr
++
=
lookahead
;
result
=
'\n'
;
result
=
'\n'
;
}
}
...
@@ -248,15 +246,6 @@ int sac_parser::get_next_char()
...
@@ -248,15 +246,6 @@ int sac_parser::get_next_char()
if
(
result
==
'\n'
)
if
(
result
==
'\n'
)
++
m_line_nr
;
++
m_line_nr
;
if
(
VERBOSE
>=
6
)
{
std
::
cerr
<<
"get_next_char => "
;
if
(
iscntrl
(
result
)
or
not
isprint
(
result
))
std
::
cerr
<<
int
(
result
)
<<
std
::
endl
;
else
std
::
cerr
<<
char
(
result
)
<<
std
::
endl
;
}
return
result
;
return
result
;
}
}
...
@@ -268,10 +257,10 @@ void sac_parser::retract()
...
@@ -268,10 +257,10 @@ void sac_parser::retract()
if
(
ch
==
'\n'
)
if
(
ch
==
'\n'
)
--
m_line_nr
;
--
m_line_nr
;
if
(
m_
buffer_ptr
==
m_buffer
+
k
BufferSize
)
if
(
m_
retract_buffer_ptr
==
m_retract_buffer
+
kRetract
BufferSize
)
throw
cif
::
parse_error
(
m_line_nr
,
"Buffer overflow"
);
throw
cif
::
parse_error
(
m_line_nr
,
"Buffer overflow"
);
*
m_buffer_ptr
++
=
ch
==
0
?
std
::
char_traits
<
char
>::
eof
()
:
std
::
char_traits
<
char
>::
to_int_type
(
ch
);
*
m_
retract_
buffer_ptr
++
=
ch
==
0
?
std
::
char_traits
<
char
>::
eof
()
:
std
::
char_traits
<
char
>::
to_int_type
(
ch
);
m_token_buffer
.
pop_back
();
m_token_buffer
.
pop_back
();
}
}
...
@@ -293,7 +282,7 @@ int sac_parser::restart(int start)
...
@@ -293,7 +282,7 @@ int sac_parser::restart(int start)
break
;
break
;
case
State
:
:
Int
:
case
State
:
:
Int
:
result
=
State
::
Reserved
;
result
=
State
::
Value
;
break
;
break
;
case
State
:
:
Reserved
:
case
State
:
:
Reserved
:
...
@@ -319,7 +308,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -319,7 +308,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
m_bol
=
false
;
m_bol
=
false
;
m_token_buffer
.
clear
();
m_token_buffer
.
clear
();
// mTokenType = CIFValue::Unknown;
m_token_value
=
{};
m_token_value
=
{};
reserved_words_automaton
dag
;
reserved_words_automaton
dag
;
...
@@ -353,6 +341,8 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -353,6 +341,8 @@ sac_parser::CIFToken sac_parser::get_next_token()
quoteChar
=
ch
;
quoteChar
=
ch
;
state
=
State
::
QuotedString
;
state
=
State
::
QuotedString
;
}
}
else
if
(
dag
.
move
(
ch
)
==
reserved_words_automaton
::
undefined
)
state
=
State
::
Reserved
;
else
else
state
=
start
=
restart
(
start
);
state
=
start
=
restart
(
start
);
break
;
break
;
...
@@ -388,7 +378,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -388,7 +378,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
{
{
retract
();
retract
();
result
=
CIFToken
::
Value
;
result
=
CIFToken
::
Value
;
// m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.data() + 1);
}
}
else
else
state
=
start
=
restart
(
start
);
state
=
start
=
restart
(
start
);
...
@@ -399,17 +388,10 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -399,17 +388,10 @@ sac_parser::CIFToken sac_parser::get_next_token()
state
=
State
::
TextField
+
1
;
state
=
State
::
TextField
+
1
;
else
if
(
ch
==
kEOF
)
else
if
(
ch
==
kEOF
)
error
(
"unterminated textfield"
);
error
(
"unterminated textfield"
);
// else if (ch == '\\')
// state = State::Esc;
else
if
(
not
is_any_print
(
ch
)
and
cif
::
VERBOSE
>
2
)
else
if
(
not
is_any_print
(
ch
)
and
cif
::
VERBOSE
>
2
)
warning
(
"invalid character in text field '"
+
std
::
string
({
static_cast
<
char
>
(
ch
)})
+
"' ("
+
std
::
to_string
((
int
)
ch
)
+
")"
);
warning
(
"invalid character in text field '"
+
std
::
string
({
static_cast
<
char
>
(
ch
)})
+
"' ("
+
std
::
to_string
((
int
)
ch
)
+
")"
);
break
;
break
;
// case State::Esc:
// if (ch == '\n')
// break;
case
State
:
:
TextField
+
1
:
case
State
:
:
TextField
+
1
:
if
(
is_text_lead
(
ch
)
or
ch
==
' '
or
ch
==
'\t'
)
if
(
is_text_lead
(
ch
)
or
ch
==
' '
or
ch
==
'\t'
)
state
=
State
::
TextField
;
state
=
State
::
TextField
;
...
@@ -417,7 +399,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -417,7 +399,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
{
{
assert
(
m_token_buffer
.
size
()
>=
2
);
assert
(
m_token_buffer
.
size
()
>=
2
);
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
()
+
1
,
m_token_buffer
.
size
()
-
3
);
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
()
+
1
,
m_token_buffer
.
size
()
-
3
);
// mTokenType = CIFValue::TextField;
result
=
CIFToken
::
Value
;
result
=
CIFToken
::
Value
;
}
}
else
if
(
ch
==
kEOF
)
else
if
(
ch
==
kEOF
)
...
@@ -440,8 +421,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -440,8 +421,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
{
{
retract
();
retract
();
result
=
CIFToken
::
Value
;
result
=
CIFToken
::
Value
;
// mTokenType = CIFValue::String;
if
(
m_token_buffer
.
size
()
<
2
)
if
(
m_token_buffer
.
size
()
<
2
)
error
(
"Invalid quoted string token"
);
error
(
"Invalid quoted string token"
);
...
@@ -468,28 +447,22 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -468,28 +447,22 @@ sac_parser::CIFToken sac_parser::get_next_token()
case
State
:
:
Float
:
case
State
:
:
Float
:
if
(
ch
==
'+'
or
ch
==
'-'
)
if
(
ch
==
'+'
or
ch
==
'-'
)
{
state
=
State
::
Float
+
1
;
state
=
State
::
Float
+
1
;
}
else
if
((
ch
>=
'0'
and
ch
<=
'9'
))
else
if
(
isdigit
(
ch
))
state
=
State
::
Float
+
1
;
state
=
State
::
Float
+
1
;
else
else
state
=
start
=
restart
(
start
);
state
=
start
=
restart
(
start
);
break
;
break
;
case
State
:
:
Float
+
1
:
case
State
:
:
Float
+
1
:
// if (ch == '(') // numeric???
// mState = State::NumericSuffix;
// else
if
(
ch
==
'.'
)
if
(
ch
==
'.'
)
state
=
State
::
Float
+
2
;
state
=
State
::
Float
+
2
;
else
if
(
tolower
(
ch
)
==
'e
'
)
else
if
(
(
ch
&
~
0x20
)
==
'E
'
)
state
=
State
::
Float
+
3
;
state
=
State
::
Float
+
3
;
else
if
(
is_white
(
ch
)
or
ch
==
kEOF
)
else
if
(
is_white
(
ch
)
or
ch
==
kEOF
)
{
{
retract
();
retract
();
result
=
CIFToken
::
Value
;
result
=
CIFToken
::
Value
;
// mTokenType = CIFValue::Int;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
}
}
else
else
...
@@ -498,13 +471,12 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -498,13 +471,12 @@ sac_parser::CIFToken sac_parser::get_next_token()
// parsed '.'
// parsed '.'
case
State
:
:
Float
+
2
:
case
State
:
:
Float
+
2
:
if
(
tolower
(
ch
)
==
'e
'
)
if
(
(
ch
&
~
0x20
)
==
'E
'
)
state
=
State
::
Float
+
3
;
state
=
State
::
Float
+
3
;
else
if
(
is_white
(
ch
)
or
ch
==
kEOF
)
else
if
(
is_white
(
ch
)
or
ch
==
kEOF
)
{
{
retract
();
retract
();
result
=
CIFToken
::
Value
;
result
=
CIFToken
::
Value
;
// mTokenType = CIFValue::Float;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
}
}
else
else
...
@@ -515,14 +487,14 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -515,14 +487,14 @@ sac_parser::CIFToken sac_parser::get_next_token()
case
State
:
:
Float
+
3
:
case
State
:
:
Float
+
3
:
if
(
ch
==
'-'
or
ch
==
'+'
)
if
(
ch
==
'-'
or
ch
==
'+'
)
state
=
State
::
Float
+
4
;
state
=
State
::
Float
+
4
;
else
if
(
isdigit
(
ch
))
else
if
(
(
ch
>=
'0'
and
ch
<=
'9'
))
state
=
State
::
Float
+
5
;
state
=
State
::
Float
+
5
;
else
else
state
=
start
=
restart
(
start
);
state
=
start
=
restart
(
start
);
break
;
break
;
case
State
:
:
Float
+
4
:
case
State
:
:
Float
+
4
:
if
(
isdigit
(
ch
))
if
(
(
ch
>=
'0'
and
ch
<=
'9'
))
state
=
State
::
Float
+
5
;
state
=
State
::
Float
+
5
;
else
else
state
=
start
=
restart
(
start
);
state
=
start
=
restart
(
start
);
...
@@ -533,7 +505,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -533,7 +505,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
{
{
retract
();
retract
();
result
=
CIFToken
::
Value
;
result
=
CIFToken
::
Value
;
// mTokenType = CIFValue::Float;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
}
}
else
else
...
@@ -541,7 +512,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -541,7 +512,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
break
;
break
;
case
State
:
:
Int
:
case
State
:
:
Int
:
if
(
isdigit
(
ch
)
or
ch
==
'+'
or
ch
==
'-'
)
if
(
(
ch
>=
'0'
and
ch
<=
'9'
)
or
ch
==
'+'
or
ch
==
'-'
)
state
=
State
::
Int
+
1
;
state
=
State
::
Int
+
1
;
else
else
state
=
start
=
restart
(
start
);
state
=
start
=
restart
(
start
);
...
@@ -552,7 +523,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -552,7 +523,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
{
{
retract
();
retract
();
result
=
CIFToken
::
Value
;
result
=
CIFToken
::
Value
;
// mTokenType = CIFValue::Int;
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
m_token_value
=
std
::
string_view
(
m_token_buffer
.
data
(),
m_token_buffer
.
size
());
}
}
else
else
...
@@ -603,80 +573,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -603,80 +573,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
}
}
break
;
break
;
// switch (ch & ~0x20)
// {
// case 'D': // data_
// state = State::Reserved + 10;
// break;
// case 'G':
// state = State::Reserved + 20; // global_
// break;
// case 'L':
// state = State::Reserved + 30; // loop_
// break;
// case 'S':
// state = State::Reserved + 40; // stop_ | save_
// break;
// default:
// state = start = restart(start);
// break;
// }
// break;
// case State::Reserved + 10: if ((ch & ~0x20) == 'A') ++state; else state = start = restart(start); break;
// case State::Reserved + 11: if ((ch & ~0x20) == 'T') ++state; else state = start = restart(start); break;
// case State::Reserved + 12: if ((ch & ~0x20) == 'A') ++state; else state = start = restart(start); break;
// case State::Reserved + 13: if ((ch & ~0x20) == '_') ++state; else state = start = restart(start); break;
// case State::Reserved + 14: if (is_non_blank(ch)) ++state; else state = start = restart(start); break;
// case State::Reserved + 15:
// if (not is_non_blank(ch))
// {
// retract();
// result = CIFToken::DATA;
// m_token_value = std::string_view(m_token_buffer.data() + 5, m_token_buffer.data() + m_token_buffer.size());
// }
// break;
// case State::Reserved + 20: if ((ch & ~0x20) == 'L') ++state; else state = start = restart(start); break;
// case State::Reserved + 21: if ((ch & ~0x20) == 'O') ++state; else state = start = restart(start); break;
// case State::Reserved + 22: if ((ch & ~0x20) == 'B') ++state; else state = start = restart(start); break;
// case State::Reserved + 23: if ((ch & ~0x20) == 'A') ++state; else state = start = restart(start); break;
// case State::Reserved + 24: if ((ch & ~0x20) == 'L') ++state; else state = start = restart(start); break;
// case State::Reserved + 25: if ((ch & ~0x20) == '_') ++state; else state = start = restart(start); break;
// case State::Reserved + 26: if (not is_non_blank(ch)) result = CIFToken::GLOBAL; else state = start = restart(start); break;
// case State::Reserved + 30: if ((ch & ~0x20) == 'O') ++state; else state = start = restart(start); break;
// case State::Reserved + 31: if ((ch & ~0x20) == 'O') ++state; else state = start = restart(start); break;
// case State::Reserved + 32: if ((ch & ~0x20) == 'P') ++state; else state = start = restart(start); break;
// case State::Reserved + 33: if ((ch & ~0x20) == '_') ++state; else state = start = restart(start); break;
// case State::Reserved + 34: if (not is_non_blank(ch)) result = CIFToken::LOOP; else state = start = restart(start); break;
// case State::Reserved + 40:
// if ((ch & ~0x20) == 'A')
// state = State::Reserved + 41;
// else if ((ch & ~0x20) == 'T')
// state = State::Reserved + 51;
// else
// state = start = restart(start);
// break;
// case State::Reserved + 41: if ((ch & ~0x20) == 'V') ++state; else state = start = restart(start); break;
// case State::Reserved + 42: if ((ch & ~0x20) == 'E') ++state; else state = start = restart(start); break;
// case State::Reserved + 43: if (is_non_blank(ch)) ++state; else state = start = restart(start); break;
// case State::Reserved + 44:
// if (not is_non_blank(ch))
// {
// retract();
// result = CIFToken::SAVE;
// m_token_value = std::string_view(m_token_buffer.data() + 5, m_token_buffer.data() + m_token_buffer.size());
// }
// break;
// case State::Reserved + 51: if ((ch & ~0x20) == 'O') ++state; else state = start = restart(start); break;
// case State::Reserved + 52: if ((ch & ~0x20) == 'P') ++state; else state = start = restart(start); break;
// case State::Reserved + 53: if ((ch & ~0x20) == '_') ++state; else state = start = restart(start); break;
// case State::Reserved + 54: if (not is_non_blank(ch)) result = CIFToken::STOP; else state = start = restart(start); break;
case
State
:
:
Value
:
case
State
:
:
Value
:
if
(
not
is_non_blank
(
ch
))
if
(
not
is_non_blank
(
ch
))
{
{
...
@@ -697,8 +593,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
...
@@ -697,8 +593,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
if
(
VERBOSE
>=
5
)
if
(
VERBOSE
>=
5
)
{
{
std
::
cerr
<<
get_token_name
(
result
);
std
::
cerr
<<
get_token_name
(
result
);
// if (mTokenType != CIFValue::Unknown)
// std::cerr << ' ' << get_value_name(mTokenType);
if
(
result
!=
CIFToken
::
Eof
)
if
(
result
!=
CIFToken
::
Eof
)
std
::
cerr
<<
" "
<<
std
::
quoted
(
m_token_value
);
std
::
cerr
<<
" "
<<
std
::
quoted
(
m_token_value
);
std
::
cerr
<<
std
::
endl
;
std
::
cerr
<<
std
::
endl
;
...
@@ -1085,4 +979,4 @@ void parser::produce_item(std::string_view category, std::string_view item, std:
...
@@ -1085,4 +979,4 @@ void parser::produce_item(std::string_view category, std::string_view item, std:
m_row
[
item
]
=
m_token_value
;
m_row
[
item
]
=
m_token_value
;
}
}
}
//
namespace
cif
}
// namespace cif
\ No newline at end of file
src/utilities.cpp
View file @
94a38ad4
...
@@ -40,7 +40,6 @@
...
@@ -40,7 +40,6 @@
#include <iostream>
#include <iostream>
#include <map>
#include <map>
#include <mutex>
#include <mutex>
#include <regex>
#include <sstream>
#include <sstream>
#include <thread>
#include <thread>
...
...
test/unit-v2-test.cpp
View file @
94a38ad4
...
@@ -2357,8 +2357,6 @@ _test.text ??
...
@@ -2357,8 +2357,6 @@ _test.text ??
BOOST_AUTO_TEST_CASE
(
output_test_1
)
BOOST_AUTO_TEST_CASE
(
output_test_1
)
{
{
cif
::
VERBOSE
=
5
;
auto
data1
=
R"(
auto
data1
=
R"(
data_Q
data_Q
loop_
loop_
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment