diff --git a/README b/README deleted file mode 100644 index 117ca56..0000000 --- a/README +++ /dev/null @@ -1,37 +0,0 @@ -Purpose: - -Numpy offers the save method for easy saving of arrays into .npy and savez for zipping multiple .npy arrays together into a .npz file. cnpy lets you read and write to these formats in C++. The motivation comes from scientific programming where large amounts of data are generated in C++ and analyzed in Python. Writing to .npy has the advantage of using low-level C++ I/O (fread and fwrite) for speed and binary format for size. The .npy file header takes care of specifying the size, shape, and data type of the array, so specifying the format of the data is unnecessary. Loading data written in numpy formats into C++ is equally simple, but requires you to type-cast the loaded data to the type of your choice. - -Installation: - -Default installation directory is /usr/local. To specify a different directory, add -DCMAKE_INSTALL_PREFIX=/path/to/install/dir to the cmake invocation in step 4. - -1. get cmake at www.cmake.org -2. create a build directory, say $HOME/build -3. cd $HOME/build -4. cmake /path/to/cnpy -5. make -6. make install - -Using: - -To use, #include"cnpy.h" in your source code. Compile the source code mycode.cpp as - -g++ -o mycode mycode.cpp -L/path/to/install/dir -lcnpy - -Description: - -There are two functions for writing data: npy_save, npz_save. - -There are 3 functions for reading. npy_load will load a .npy file. npz_load(fname) will load a .npz and return a dictionary of NpyArray structues. npz_load(fname,varname) will load and return the NpyArray for data varname from the specified .npz file. -Note that NpyArray allocates char* data using new[] and *will not* delete the data upon the NpyArray destruction. You are responsible for delete the data yourself. - -The data structure for loaded data is below. Data is loaded into a a raw byte array. The array shape and word size are read from the npy header. You are responsible for casting/copying the data to its intended data type. - -struct NpyArray { - char* data; - std::vector shape; - unsigned int word_size; -}; - -See example1.cpp for examples of how to use the library. example1 will also be build during cmake installation. diff --git a/README.md b/README.md new file mode 100644 index 0000000..ec962d0 --- /dev/null +++ b/README.md @@ -0,0 +1,38 @@ +## Purpose + +Numpy offers the save method for easy saving of arrays into .npy and savez for zipping multiple .npy arrays together into a .npz file. cnpy lets you read and write to these formats in C++. The motivation comes from scientific programming where large amounts of data are generated in C++ and analyzed in Python. Writing to .npy has the advantage of using low-level C++ I/O (fread and fwrite) for speed and binary format for size. The .npy file header takes care of specifying the size, shape, and data type of the array, so specifying the format of the data is unnecessary. Loading data written in numpy formats into C++ is equally simple, but requires you to type-cast the loaded data to the type of your choice. + +## Installation + +Default installation directory is `/usr/local`. To specify a different directory, add `-DCMAKE_INSTALL_PREFIX=/path/to/install/dir` to the cmake invocation in step 4. + + get cmake at www.cmake.org + create a build directory, say $HOME/build + cd $HOME/build + cmake /path/to/cnpy + make + make install + +## Using + +To use, `#include"cnpy.h"` in your source code. Compile the source code mycode.cpp as + + g++ -o mycode mycode.cpp -L/path/to/install/dir -lcnpy + +## Description + +There are two functions for writing data: `npy_save`, `npz_save`. + +There are 3 functions for reading. `npy_load` will load a .npy file. `npz_load(fname)` will load a .npz and return a dictionary of `NpyArray` structues. `npz_load(fname,varname)` will load and return the `NpyArray` for data varname from the specified .npz file. +Note that `NpyArray` allocates `char*` data using `new[]` and *will not* delete the data upon the `NpyArray` destruction. You are responsible for delete the data yourself. +The `order` argument when calling `npy_save` or `npz_save` can take the values "C" or "F", indicating C or Fortran order respectively. + +The data structure for loaded data is below. Data is loaded into a a raw byte array. The array shape and word size are read from the npy header. You are responsible for casting/copying the data to its intended data type. + + struct NpyArray { + char* data; + std::vector shape; + unsigned int word_size; + }; + +See example1.cpp for examples of how to use the library. example1 will also be build during cmake installation. diff --git a/cnpy.h b/cnpy.h index 8229b46..6b04050 100644 --- a/cnpy.h +++ b/cnpy.h @@ -35,7 +35,7 @@ namespace cnpy { char BigEndianTest(); char map_type(const std::type_info& t); - template std::vector create_npy_header(const T* data, const unsigned int* shape, const unsigned int ndims); + template std::vector create_npy_header(const T* data, const unsigned int* shape, const unsigned int ndims, std::string order); void parse_npy_header(FILE* fp,unsigned int& word_size, unsigned int*& shape, unsigned int& ndims); void parse_zip_footer(FILE* fp, unsigned short& nrecs, unsigned int& global_header_size, unsigned int& global_header_offset); npz_t npz_load(std::string fname); @@ -61,7 +61,7 @@ namespace cnpy { return s.str(); } - template void npy_save(std::string fname, const T* data, const unsigned int* shape, const unsigned int ndims, std::string mode = "w") { + template void npy_save(std::string fname, const T* data, const unsigned int* shape, const unsigned int ndims, std::string mode = "w", std::string order = "C") { FILE* fp = NULL; if(mode == "a") fp = fopen(fname.c_str(),"r+b"); @@ -90,7 +90,7 @@ namespace cnpy { tmp_shape[0] += shape[0]; fseek(fp,0,SEEK_SET); - std::vector header = create_npy_header(data,tmp_shape,ndims); + std::vector header = create_npy_header(data,tmp_shape,ndims,order); fwrite(&header[0],sizeof(char),header.size(),fp); fseek(fp,0,SEEK_END); @@ -98,7 +98,7 @@ namespace cnpy { } else { fp = fopen(fname.c_str(),"wb"); - std::vector header = create_npy_header(data,shape,ndims); + std::vector header = create_npy_header(data,shape,ndims,order); fwrite(&header[0],sizeof(char),header.size(),fp); } @@ -109,7 +109,7 @@ namespace cnpy { fclose(fp); } - template void npz_save(std::string zipname, std::string fname, const T* data, const unsigned int* shape, const unsigned int ndims, std::string mode = "w") + template void npz_save(std::string zipname, std::string fname, const T* data, const unsigned int* shape, const unsigned int ndims, std::string mode = "w", std::string order = "C") { //first, append a .npy to the fname fname += ".npy"; @@ -138,7 +138,7 @@ namespace cnpy { fp = fopen(zipname.c_str(),"wb"); } - std::vector npy_header = create_npy_header(data,shape,ndims); + std::vector npy_header = create_npy_header(data,shape,ndims,order); unsigned long nels = 1; for (int m=0; m std::vector create_npy_header(const T* data, const unsigned int* shape, const unsigned int ndims) { + template std::vector create_npy_header(const T* data, const unsigned int* shape, const unsigned int ndims, std::string order) { std::vector dict; dict += "{'descr': '"; dict += BigEndianTest(); dict += map_type(typeid(T)); dict += tostring(sizeof(T)); - dict += "', 'fortran_order': False, 'shape': ("; + if (order == "F") { + dict += "', 'fortran_order': True, 'shape': ("; + } else { + dict += "', 'fortran_order':False, 'shape': ("; + } dict += tostring(shape[0]); for(int i = 1;i < ndims;i++) { dict += ", "; diff --git a/example1.cpp b/example1.cpp index 7f1ab5d..24fe786 100644 --- a/example1.cpp +++ b/example1.cpp @@ -17,7 +17,7 @@ int main() //save it to file const unsigned int shape[] = {Nz,Ny,Nx}; - cnpy::npy_save("arr1.npy",data,shape,3,"w"); + cnpy::npy_save("arr1.npy",data,shape,3,"w","F"); // Save with Fortran order //load it into a new array cnpy::NpyArray arr = cnpy::npy_load("arr1.npy"); @@ -30,16 +30,16 @@ int main() //append the same data to file //npy array on file now has shape (Nz+Nz,Ny,Nx) - cnpy::npy_save("arr1.npy",data,shape,3,"a"); + cnpy::npy_save("arr1.npy",data,shape,3,"a","C"); // Save with C order //now write to an npz file //non-array variables are treated as 1D arrays with 1 element double myVar1 = 1.2; char myVar2 = 'a'; unsigned int shape2[] = {1}; - cnpy::npz_save("out.npz","myVar1",&myVar1,shape2,1,"w"); //"w" overwrites any existing file - cnpy::npz_save("out.npz","myVar2",&myVar2,shape2,1,"a"); //"a" appends to the file we created above - cnpy::npz_save("out.npz","arr1",data,shape,3,"a"); //"a" appends to the file we created above + cnpy::npz_save("out.npz","myVar1",&myVar1,shape2,1,"w", "C"); //"w" overwrites any existing file + cnpy::npz_save("out.npz","myVar2",&myVar2,shape2,1,"a", "C"); //"a" appends to the file we created above + cnpy::npz_save("out.npz","arr1",data,shape,3,"a", "C"); //"a" appends to the file we created above //load a single var from the npz file cnpy::NpyArray arr2 = cnpy::npz_load("out.npz","arr1");