creativity  v1.3.0
Agent-based model of creativity and piracy
CSVParser.hpp
1 #pragma once
2 #include <eris/noncopyable.hpp>
3 #include <string>
4 #include <unordered_set>
5 #include <unordered_map>
6 #include <vector>
7 #include <fstream>
8 
9 namespace creativity { namespace data {
10 
32 class CSVParser : private eris::noncopyable {
33  public:
35  CSVParser() = delete;
41  explicit CSVParser(const std::string &filename);
42 
49  const std::unordered_set<std::string>& skip() const;
50 
52  void skip(const std::string &name);
53 
55  void dontSkip(const std::string &name);
56 
62  const std::vector<std::string>& header() const;
63 
70  const std::vector<std::string>& fields() const;
71 
76  bool hasField(const std::string &field) const;
77 
82  size_t fieldPosition(const std::string &field) const;
83 
91  double field(const std::string &field) const;
92 
99 
110  bool readRow();
111 
113  bool eof() const;
114 
118  const std::vector<double>& row() const { return row_; }
119 
121  const std::unordered_map<std::string, std::string>& rowSkipped() const { return row_skipped_; }
122 
124  const size_t& lineNumber() const { return lineno_; }
125 
126  // forward declaration
127  class iterator;
128 
130  iterator begin();
131 
133  iterator end();
134 
135  private:
136  // Split a string by , and return a vector of elements
137  static std::vector<std::string> split(const std::string &csr);
138 
139  void updateFields(); // regenerate fields, omitting things in skip_
140  std::unordered_set<std::string> skip_; // things to skip in header_ when making fields_
141  std::vector<std::string> header_; // the header read from the file
142  std::vector<std::string> fields_; // the header fields remaining after skipping skip_
143  mutable std::unordered_map<std::string, unsigned> field_pos_; // Map from field name to row position (created from fields_ on-demand)
144  std::fstream f_;
145  size_t lineno_; // Tracks the current line number
146  std::vector<double> row_; // The most-recently-read row (reused)
147  std::unordered_map<std::string, std::string> row_skipped_; // Any skipped fields on most-recently-read row
148 };
149 
152 class CSVParser::iterator final : public std::iterator<std::input_iterator_tag, const std::vector<double>, long> {
153  public:
155  reference operator*() { return csv_.row(); }
156 
158  pointer operator->() { return &csv_.row(); }
159 
163  iterator& operator++() { end_ = not csv_.readRow(); return *this; }
164 
169  bool operator==(const iterator &other) {
170  return
171  &csv_ == &(other.csv_) // same CSV object
172  and
173  end_ == other.end_; // Both are end (either by calling end() or by hitting eof)
174  }
175 
177  bool operator!=(const iterator &other) { return !(*this == other); }
178 
179  private:
180  iterator() = delete;
181  CSVParser &csv_;
182  bool end_; // true if this is a past-the-end iterator
183  iterator(CSVParser &csv, bool end) : csv_(csv), end_(end) { if (!end_) csv_.readRow(); }
184  friend class CSVParser;
185 };
186 
187 }}
pointer operator->()
Dereferences the iterator, returning the current CSVParser row pointer.
Definition: CSVParser.hpp:158
Primary namespace for all Creativity library code.
Definition: config.hpp:4
bool operator==(const iterator &other)
Return true if the given object is a reference to the current object, or if the current object is at ...
Definition: CSVParser.hpp:169
CSVParser()=delete
Not default constructible.
reference operator*()
Dereferences the iterator, returning the current CSVParser row.
Definition: CSVParser.hpp:155
const std::vector< std::string > & header() const
Returns the vector of header names read during construction.
bool readRow()
Reads the next line of the CSV file, storing it in row(), replacing what was previously stored there...
double field(const std::string &field) const
Returns the value (in the current row) of the given field.
size_t fieldPosition(const std::string &field) const
Returns the column index of the given field.
const size_t & lineNumber() const
Accesses the most-recently-read line number.
Definition: CSVParser.hpp:124
iterator end()
Returns a past-the-end iterator.
bool eof() const
Returns true if the parser has reached the end of the file.
bool hasField(const std::string &field) const
Returns true if fields() contains the requested field name.
const std::unordered_map< std::string, std::string > & rowSkipped() const
Accesses any skipped fields in the most-recently-read row.
Definition: CSVParser.hpp:121
const std::vector< std::string > & fields() const
Returns the vector of field names corresponding to a readRow() call.
iterator & operator++()
Increments the iterator, reading the next row of the file.
Definition: CSVParser.hpp:163
const std::vector< double > & row() const
Accesses the most-recently-read row of the file.
Definition: CSVParser.hpp:118
Primitive comma-separated-value file parser.
Definition: CSVParser.hpp:32
size_t allow_missing_values
The number of values that may be missing from the end of a data row for subsequent read rows...
Definition: CSVParser.hpp:98
iterator begin()
Returns an iterator that reads through the file.
void dontSkip(const std::string &name)
Removes the given field name from the list of header fields to skip, if present.
Iterator class that allows iterating through the file.
Definition: CSVParser.hpp:152
bool operator!=(const iterator &other)
Returns the negation of the == operator.
Definition: CSVParser.hpp:177
const std::unordered_set< std::string > & skip() const
The set of fields to skip; all non-numeric fields must be added here before attempting to read a data...