Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
libcifpp
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
open
libcifpp
Commits
19210df6
Unverified
Commit
19210df6
authored
Feb 08, 2022
by
Maarten L. Hekkelman
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix parsing mmCIF files with an unquoted string ??
parent
15c57307
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
81 additions
and
110 deletions
+81
-110
CMakeLists.txt
+1
-1
changelog
+7
-0
include/cif++/CifParser.hpp
+4
-4
src/CifParser.cpp
+69
-105
test/unit-test.cpp
+0
-0
No files found.
CMakeLists.txt
View file @
19210df6
...
...
@@ -25,7 +25,7 @@
cmake_minimum_required
(
VERSION 3.16
)
# set the project name
project
(
cifpp VERSION 3.0.
2
LANGUAGES CXX
)
project
(
cifpp VERSION 3.0.
4
LANGUAGES CXX
)
list
(
PREPEND CMAKE_MODULE_PATH
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/cmake"
)
...
...
changelog
View file @
19210df6
Version
3.0.4
-
Fix
in
mmCIF
parser
,
now
correctly
handles
the
unquoted
string
??
Version
3.0.3
-
Better
configuration
checks
,
for
atomic
e
.
g
.
-
Fixed
a
problem
introduced
in
refactoring
mmcif
::
Atom
...
...
@@ -17,6 +21,9 @@ Version 3.0.0
-
Upgraded
mmcif
::
Structure
-
various
other
small
fixes
Version
2.0.5
-
Backporting
updated
CMakeLists
.
txt
file
Version
2.0.4
-
Reverted
a
too
strict
test
when
reading
cif
files
.
...
...
include/cif++/CifParser.hpp
View file @
19210df6
...
...
@@ -139,7 +139,7 @@ class SacParser
int
getNextChar
();
void
retract
();
void
restart
(
);
int
restart
(
int
start
);
CIFToken
getNextToken
();
void
match
(
CIFToken
token
);
...
...
@@ -181,8 +181,9 @@ class SacParser
eStateTextField
,
eStateFloat
=
100
,
eStateInt
=
110
,
// eStateNumericSuffix = 200,
eStateValue
=
300
eStateValue
=
300
,
eStateDATA
,
eStateSAVE
};
std
::
istream
&
mData
;
...
...
@@ -191,7 +192,6 @@ class SacParser
bool
mValidate
;
uint32_t
mLineNr
;
bool
mBol
;
int
mState
,
mStart
;
CIFToken
mLookahead
;
std
::
string
mTokenValue
;
CIFValueType
mTokenType
;
...
...
src/CifParser.cpp
View file @
19210df6
...
...
@@ -42,7 +42,7 @@ namespace cif
const
uint32_t
kMaxLineLength
=
132
;
const
uint8_t
kCharTraitsTable
[
128
]
=
{
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
14
,
15
,
14
,
14
,
14
,
15
,
15
,
14
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
// 2
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
10
,
15
,
15
,
15
,
15
,
// 3
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
15
,
// 4
...
...
@@ -151,23 +151,26 @@ void SacParser::retract()
mTokenValue
.
pop_back
();
}
void
SacParser
::
restart
()
int
SacParser
::
restart
(
int
start
)
{
int
result
=
0
;
while
(
not
mTokenValue
.
empty
())
retract
();
switch
(
mS
tart
)
switch
(
s
tart
)
{
case
eStateStart
:
mState
=
mStar
t
=
eStateFloat
;
resul
t
=
eStateFloat
;
break
;
case
eStateFloat
:
mState
=
mStar
t
=
eStateInt
;
resul
t
=
eStateInt
;
break
;
case
eStateInt
:
mState
=
mStar
t
=
eStateValue
;
resul
t
=
eStateValue
;
break
;
default
:
...
...
@@ -175,6 +178,8 @@ void SacParser::restart()
}
mBol
=
false
;
return
result
;
}
void
SacParser
::
match
(
SacParser
::
CIFToken
t
)
...
...
@@ -191,7 +196,7 @@ SacParser::CIFToken SacParser::getNextToken()
CIFToken
result
=
eCIFTokenUnknown
;
int
quoteChar
=
0
;
mState
=
mS
tart
=
eStateStart
;
int
state
=
eStateStart
,
s
tart
=
eStateStart
;
mBol
=
false
;
mTokenValue
.
clear
();
...
...
@@ -201,7 +206,7 @@ SacParser::CIFToken SacParser::getNextToken()
{
auto
ch
=
getNextChar
();
switch
(
mS
tate
)
switch
(
s
tate
)
{
case
eStateStart
:
if
(
ch
==
kEOF
)
...
...
@@ -209,27 +214,23 @@ SacParser::CIFToken SacParser::getNextToken()
else
if
(
ch
==
'\n'
)
{
mBol
=
true
;
mS
tate
=
eStateWhite
;
s
tate
=
eStateWhite
;
}
else
if
(
ch
==
' '
or
ch
==
'\t'
)
mS
tate
=
eStateWhite
;
s
tate
=
eStateWhite
;
else
if
(
ch
==
'#'
)
mState
=
eStateComment
;
else
if
(
ch
==
'.'
)
mState
=
eStateDot
;
state
=
eStateComment
;
else
if
(
ch
==
'_'
)
mS
tate
=
eStateTag
;
s
tate
=
eStateTag
;
else
if
(
ch
==
';'
and
mBol
)
mS
tate
=
eStateTextField
;
s
tate
=
eStateTextField
;
else
if
(
ch
==
'\''
or
ch
==
'"'
)
{
quoteChar
=
ch
;
mS
tate
=
eStateQuotedString
;
s
tate
=
eStateQuotedString
;
}
else
if
(
ch
==
'?'
)
mState
=
eStateQuestionMark
;
else
restart
(
);
state
=
start
=
restart
(
start
);
break
;
case
eStateWhite
:
...
...
@@ -237,7 +238,7 @@ SacParser::CIFToken SacParser::getNextToken()
result
=
eCIFTokenEOF
;
else
if
(
not
isspace
(
ch
))
{
mS
tate
=
eStateStart
;
s
tate
=
eStateStart
;
retract
();
mTokenValue
.
clear
();
}
...
...
@@ -248,7 +249,7 @@ SacParser::CIFToken SacParser::getNextToken()
case
eStateComment
:
if
(
ch
==
'\n'
)
{
mS
tate
=
eStateStart
;
s
tate
=
eStateStart
;
mBol
=
true
;
mTokenValue
.
clear
();
}
...
...
@@ -258,44 +259,19 @@ SacParser::CIFToken SacParser::getNextToken()
error
(
"invalid character in comment"
);
break
;
case
eStateQuestionMark
:
if
(
isNonBlank
(
ch
))
mState
=
eStateValue
;
else
{
retract
();
result
=
eCIFTokenValue
;
mTokenValue
.
clear
();
mTokenType
=
eCIFValueUnknown
;
}
break
;
case
eStateDot
:
if
(
isdigit
(
ch
))
mState
=
eStateFloat
+
2
;
else
if
(
isspace
(
ch
))
{
retract
();
result
=
eCIFTokenValue
;
mTokenType
=
eCIFValueInapplicable
;
}
else
mState
=
eStateValue
;
break
;
case
eStateTextField
:
if
(
ch
==
'\n'
)
mS
tate
=
eStateTextField
+
1
;
s
tate
=
eStateTextField
+
1
;
else
if
(
ch
==
kEOF
)
error
(
"unterminated textfield"
);
else
if
(
not
isAnyPrint
(
ch
)
and
cif
::
VERBOSE
>=
0
)
else
if
(
not
isAnyPrint
(
ch
))
// error("invalid character in text field '" + string({ static_cast<char>(ch) }) + "' (" + to_string((int)ch) + ")");
std
::
cerr
<<
"invalid character in text field '"
<<
std
::
string
({
static_cast
<
char
>
(
ch
)})
<<
"' ("
<<
ch
<<
") line: "
<<
mLineNr
<<
std
::
endl
;
break
;
case
eStateTextField
+
1
:
if
(
isTextLead
(
ch
)
or
ch
==
' '
or
ch
==
'\t'
)
mS
tate
=
eStateTextField
;
s
tate
=
eStateTextField
;
else
if
(
ch
==
';'
)
{
assert
(
mTokenValue
.
length
()
>=
2
);
...
...
@@ -313,7 +289,7 @@ SacParser::CIFToken SacParser::getNextToken()
if
(
ch
==
kEOF
)
error
(
"unterminated quoted string"
);
else
if
(
ch
==
quoteChar
)
mS
tate
=
eStateQuotedStringQuote
;
s
tate
=
eStateQuotedStringQuote
;
else
if
(
not
isAnyPrint
(
ch
))
error
(
"invalid character in quoted string"
);
break
;
...
...
@@ -331,7 +307,7 @@ SacParser::CIFToken SacParser::getNextToken()
else
if
(
ch
==
quoteChar
)
;
else
if
(
isAnyPrint
(
ch
))
mS
tate
=
eStateQuotedString
;
s
tate
=
eStateQuotedString
;
else
if
(
ch
==
kEOF
)
error
(
"unterminated quoted string"
);
else
...
...
@@ -349,12 +325,12 @@ SacParser::CIFToken SacParser::getNextToken()
case
eStateFloat
:
if
(
ch
==
'+'
or
ch
==
'-'
)
{
mS
tate
=
eStateFloat
+
1
;
s
tate
=
eStateFloat
+
1
;
}
else
if
(
isdigit
(
ch
))
mS
tate
=
eStateFloat
+
1
;
s
tate
=
eStateFloat
+
1
;
else
restart
(
);
state
=
start
=
restart
(
start
);
break
;
case
eStateFloat
+
1
:
...
...
@@ -362,9 +338,9 @@ SacParser::CIFToken SacParser::getNextToken()
// mState = eStateNumericSuffix;
// else
if
(
ch
==
'.'
)
mS
tate
=
eStateFloat
+
2
;
s
tate
=
eStateFloat
+
2
;
else
if
(
tolower
(
ch
)
==
'e'
)
mS
tate
=
eStateFloat
+
3
;
s
tate
=
eStateFloat
+
3
;
else
if
(
isWhite
(
ch
)
or
ch
==
kEOF
)
{
retract
();
...
...
@@ -372,16 +348,13 @@ SacParser::CIFToken SacParser::getNextToken()
mTokenType
=
eCIFValueInt
;
}
else
restart
(
);
state
=
start
=
restart
(
start
);
break
;
// parsed '.'
case
eStateFloat
+
2
:
// if (ch == '(') // numeric???
// mState = eStateNumericSuffix;
// else
if
(
tolower
(
ch
)
==
'e'
)
mS
tate
=
eStateFloat
+
3
;
s
tate
=
eStateFloat
+
3
;
else
if
(
isWhite
(
ch
)
or
ch
==
kEOF
)
{
retract
();
...
...
@@ -389,30 +362,27 @@ SacParser::CIFToken SacParser::getNextToken()
mTokenType
=
eCIFValueFloat
;
}
else
restart
(
);
state
=
start
=
restart
(
start
);
break
;
// parsed 'e'
case
eStateFloat
+
3
:
if
(
ch
==
'-'
or
ch
==
'+'
)
mS
tate
=
eStateFloat
+
4
;
s
tate
=
eStateFloat
+
4
;
else
if
(
isdigit
(
ch
))
mS
tate
=
eStateFloat
+
5
;
s
tate
=
eStateFloat
+
5
;
else
restart
(
);
state
=
start
=
restart
(
start
);
break
;
case
eStateFloat
+
4
:
if
(
isdigit
(
ch
))
mS
tate
=
eStateFloat
+
5
;
s
tate
=
eStateFloat
+
5
;
else
restart
(
);
state
=
start
=
restart
(
start
);
break
;
case
eStateFloat
+
5
:
// if (ch == '(')
// mState = eStateNumericSuffix;
// else
if
(
isWhite
(
ch
)
or
ch
==
kEOF
)
{
retract
();
...
...
@@ -420,14 +390,14 @@ SacParser::CIFToken SacParser::getNextToken()
mTokenType
=
eCIFValueFloat
;
}
else
restart
(
);
state
=
start
=
restart
(
start
);
break
;
case
eStateInt
:
if
(
isdigit
(
ch
)
or
ch
==
'+'
or
ch
==
'-'
)
mS
tate
=
eStateInt
+
1
;
s
tate
=
eStateInt
+
1
;
else
restart
(
);
state
=
start
=
restart
(
start
);
break
;
case
eStateInt
+
1
:
...
...
@@ -438,35 +408,11 @@ SacParser::CIFToken SacParser::getNextToken()
mTokenType
=
eCIFValueInt
;
}
else
restart
(
);
state
=
start
=
restart
(
start
);
break
;
// case eStateNumericSuffix:
// if (isdigit(ch))
// mState = eStateNumericSuffix + 1;
// else
// restart();
// break;
//
// case eStateNumericSuffix + 1:
// if (ch == ')')
// {
// result = eCIFTokenValue;
// mTokenType = eCIFValueNumeric;
// }
// else if (not isdigit(ch))
// restart();
// break;
case
eStateValue
:
if
(
isNonBlank
(
ch
))
mState
=
eStateValue
+
1
;
else
error
(
"invalid character at this position"
);
break
;
case
eStateValue
+
1
:
if
(
ch
==
'_'
)
// first _, check for keywords
if
(
ch
==
'_'
)
{
std
::
string
s
=
toLowerCopy
(
mTokenValue
);
...
...
@@ -476,23 +422,40 @@ SacParser::CIFToken SacParser::getNextToken()
result
=
eCIFTokenSTOP
;
else
if
(
s
==
"loop_"
)
result
=
eCIFTokenLOOP
;
else
if
(
s
==
"data_"
or
s
==
"save_"
)
mState
=
eStateValue
+
2
;
else
if
(
s
==
"data_"
)
{
state
=
eStateDATA
;
continue
;
}
else
if
(
not
isNonBlank
(
ch
))
else
if
(
s
==
"save_"
)
{
state
=
eStateSAVE
;
continue
;
}
}
if
(
result
==
eCIFTokenUnknown
and
not
isNonBlank
(
ch
))
{
retract
();
result
=
eCIFTokenValue
;
mTokenType
=
eCIFValueString
;
if
(
mTokenValue
==
"."
)
mTokenType
=
eCIFValueInapplicable
;
else
if
(
mTokenValue
==
"?"
)
{
mTokenType
=
eCIFValueUnknown
;
mTokenValue
.
clear
();
}
}
break
;
case
eStateValue
+
2
:
case
eStateDATA
:
case
eStateSAVE
:
if
(
not
isNonBlank
(
ch
))
{
retract
();
if
(
tolower
(
mTokenValue
[
0
])
==
'd'
)
if
(
state
==
eStateDATA
)
result
=
eCIFTokenDATA
;
else
result
=
eCIFTokenSAVE
;
...
...
@@ -521,6 +484,7 @@ SacParser::CIFToken SacParser::getNextToken()
return
result
;
}
DatablockIndex
SacParser
::
indexDatablocks
()
{
DatablockIndex
index
;
...
...
test/unit-test.cpp
View file @
19210df6
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment