diff --git a/dataset_utils/Readme.md b/dataset_utils/Readme.md new file mode 100644 index 00000000..41396633 --- /dev/null +++ b/dataset_utils/Readme.md @@ -0,0 +1,8 @@ +# Dataset Utils + +This directory contains utility functions related to Datasets. + +Current Implemented features + + * Convert CSV files to JSON([Issue](https://github.com/mlpack/models/issues/22)) + * Convert CSV files to XML([Issue](https://github.com/mlpack/models/issues/22)) diff --git a/dataset_utils/convert.cpp b/dataset_utils/convert.cpp new file mode 100644 index 00000000..0aca549d --- /dev/null +++ b/dataset_utils/convert.cpp @@ -0,0 +1,105 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +using namespace boost::property_tree; +using namespace boost; + +class Convert +{ + auto tokenize(std::string& line) + { + std::vector col_names; + tokenizer > tk(line, escaped_list_separator()); + for (tokenizer >::iterator i(tk.begin()); i != tk.end(); ++i) + col_names.push_back(*i); + return col_names; + } + + auto create_XML(std::vector& tags, std::vector rows) + { + static int ctr; + ptree XMLobjectL; + std::string tag, value; + + for (auto i : boost::combine(tags, rows)) + { + //tag contains tags, value contains corresponding values + boost::tie(tag, value) = i; + XMLobjectL.put("annotation.object." + tag, value); + } + + write_xml(std::to_string(ctr) + ".xml", XMLobjectL, std::locale(), + xml_writer_make_settings(' ', 1u)); + ctr++; + } + + auto create_JSON(std::vector& tags, std::vector rows) + { + static int ctr; + ptree XMLobjectL; + std::string tag, value; + + for (auto i : boost::combine(tags, rows)) + { + //tag contains tags, value contains corresponding values + boost::tie(tag, value) = i; + XMLobjectL.put("annotation.object." + tag, value); + } + + write_json(std::to_string(ctr) + ".json", XMLobjectL); + ctr++; + } + + void convertHelper(std::string path, std::string to) + { + //static int ctr; + static std::unordered_map fileNames; + std::vector tags; + std::vector rows; + std::ifstream file(path); + std::string line; + std::vector col_names; + + std::getline(file, line); + tags = tokenize(line); + + if (to == "xml") + { + while (std::getline(file, line)) + { + create_XML(tags, tokenize(line)); + } + } + else if (to == "json") + { + while (std::getline(file, line)) + { + create_JSON(tags, tokenize(line)); + } + } + } + +public: + void convert(std::string path, std::string to) + { + convertHelper(path, to); + } +}; + +// How to invoke +/* +int main() { + Convert foo; + foo.convert("path_to_csv.csv", "xml"); + foo.convert("path_to_csv.csv", "json"); +}*/