documenting parser

2aebfc29 · Maarten L. Hekkelman · 26a5410b · 2aebfc29 · 2aebfc29
Commit 2aebfc29 authored Sep 06, 2023 by Maarten L. Hekkelman
Hide whitespace changes
Inline Side-by-side

Showing with 79 additions and 5 deletions

include/cif++/model.hpp
+1 -1

include/cif++/parser.hpp
+78 -4

No files found.
--- a/include/cif++/model.hpp
+++ b/include/cif++/model.hpp
@@ -964,7 +964,7 @@ class structure
 		return get_residue(atom.get_label_asym_id(), atom.get_label_comp_id(), atom.get_label_seq_id(), atom.get_auth_seq_id());
 	}

-	// Actions
+	// Actions. Originally a lot more actions were expected here

 	/// \brief Remove atom @a a
 	void remove_atom(atom &a)

--- a/include/cif++/parser.hpp
+++ b/include/cif++/parser.hpp
@@ -30,14 +30,22 @@

 #include <map>

+/**
+ * @file parser.hpp
+ * 
+ * This file contains the declaration of an mmCIF parser
+ */
+
 namespace cif
 {

 // --------------------------------------------------------------------

+/** Exception that is thrown when the mmCIF file contains a parsing error */
 class parse_error : public std::runtime_error
 {
  public:
+	/// \brief constructor
 	parse_error(uint32_t line_nr, const std::string &message)
 		: std::runtime_error("parse error at line " + std::to_string(line_nr) + ": " + message)
 	{
@@ -46,57 +54,83 @@ class parse_error : public std::runtime_error

 // --------------------------------------------------------------------

+/**
+ * @brief The sac_parser is a similar to SAX parsers (Simple API for XML, 
+ * in our case it is Simple API for CIF)
+ * 
+ * This is a hand crafted, optimised parser for reading cif files,
+ * both cif 1.0 and cif 1.1 is supported. But version 2.0 is not.
+ * That means that the content of files strictly contains only
+ * ASCII characters. Anything else will generate an error.
+ * 
+ * This class is an abstract base class. Derived classes should
+ * implement the produce_ methods.
+ */
+
 // TODO: Need to implement support for transformed long lines

 class sac_parser
 {
  public:
+	/** @cond */
 	using datablock_index = std::map<std::string, std::size_t>;

 	virtual ~sac_parser() = default;
+	/** @endcond */

+	/// \brief The parser only supports ASCII so we can
+	/// create a table with character properties.
 	enum CharTraitsMask : uint8_t
 	{
-		kOrdinaryMask = 1 << 0,
-		kNonBlankMask = 1 << 1,
-		kTextLeadMask = 1 << 2,
-		kAnyPrintMask = 1 << 3
+		kOrdinaryMask = 1 << 0,	///< The character is in the Ordinary class
+		kNonBlankMask = 1 << 1,	///< The character is in the NonBlank class
+		kTextLeadMask = 1 << 2,	///< The character is in the TextLead class
+		kAnyPrintMask = 1 << 3	///< The character is in the AnyPrint class
 	};

+	/// \brief Return true if the character @a ch is a *space* character
 	static constexpr bool is_space(int ch)
 	{
 		return ch == ' ' or ch == '\t' or ch == '\r' or ch == '\n';
 	}

+	/// \brief Return true if the character @a ch is a *white* character
 	static constexpr bool is_white(int ch)
 	{
 		return is_space(ch) or ch == '#';
 	}

+	/// \brief Return true if the character @a ch is a *ordinary* character
 	static constexpr bool is_ordinary(int ch)
 	{
 		return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kOrdinaryMask) != 0;
 	}

+	/// \brief Return true if the character @a ch is a *non_blank* character
 	static constexpr bool is_non_blank(int ch)
 	{
 		return ch > 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kNonBlankMask) != 0;
 	}

+	/// \brief Return true if the character @a ch is a *text_lead* character
 	static constexpr bool is_text_lead(int ch)
 	{
 		return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kTextLeadMask) != 0;
 	}

+	/// \brief Return true if the character @a ch is a *any_print* character
 	static constexpr bool is_any_print(int ch)
 	{
 		return ch == '\t' or
 		       (ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
 	}

+	/// \brief Return true if the string in @a text can safely be written without quotation
 	static bool is_unquoted_string(std::string_view text);

  protected:
+	/** @cond */
+
 	static constexpr uint8_t kCharTraitsTable[128] = {
 		//	0	1	2	3	4	5	6	7	8	9	a	b	c	d	e	f
 		14, 15, 14, 14, 14, 15, 15, 14, 15, 15, 15, 15, 15, 15, 15, 15, //	2
@@ -152,17 +186,45 @@ class sac_parser

 	void match(CIFToken token);

+	/** @endcond */
+
  public:
+
+	/** \brief Parse only a single datablock in the string @a datablock
+	 * The start of the datablock is first located and then data
+	 * is parsed up until the next start of a datablock or the end of
+	 * the data.
+	 * */
 	bool parse_single_datablock(const std::string &datablock);

+	/** \brief Return an index for all the datablocks found, that is
+	 * the index will contain the names and offsets for each.
+	 */
 	datablock_index index_datablocks();

+	/**
+	 * @brief Parse the datablock named @a datablock
+	 * 
+	 * This will first lookup the datablock's offset in the index @a index
+	 * and then start parsing from that location until the next datablock.
+	 * 
+	 * @param datablock Name of the datablock to parse
+	 * @param index The index created using index_datablocks
+	 * @return true If the datablock was found
+	 * @return false If the datablock was not found
+	 */
 	bool parse_single_datablock(const std::string &datablock, const datablock_index &index);

+	/**
+	 * @brief Parse the file
+	 * 
+	 */
 	void parse_file();

  protected:

+	/** @cond */
+
 	sac_parser(std::istream &is, bool init = true);

 	void parse_global();
@@ -222,19 +284,29 @@ class sac_parser
 	// token buffer
 	std::vector<char> m_token_buffer;
 	std::string_view m_token_value;
+
+	/** @endcond */
 };

 // --------------------------------------------------------------------

+/**
+ * @brief An actual implementation of a sac_parser generating data in a file
+ * 
+ * This parser will create the cif::file, cif::datablock and cif::category
+ * objects required to contain all data
+ */
 class parser : public sac_parser
 {
  public:
+	/// \brief constructor, generates data into @a file from @a is
 	parser(std::istream &is, file &file)
 		: sac_parser(is)
 		, m_file(file)
 	{
 	}

+	/** @cond */
 	void produce_datablock(std::string_view name) override;

 	void produce_category(std::string_view name) override;
@@ -248,6 +320,8 @@ class parser : public sac_parser
 	datablock *m_datablock = nullptr;
 	category *m_category = nullptr;
 	row_handle m_row;
+
+	/** @endcond */
 };

 } // namespace cif