Skip to content

Commit

Permalink
Merge pull request #123 from paulsengroup/feature/better-api
Browse files Browse the repository at this point in the history
Make API more ergonomic
  • Loading branch information
robomics authored Nov 13, 2024
2 parents b0a5e60 + a28f14f commit 1fd4c6d
Show file tree
Hide file tree
Showing 11 changed files with 76 additions and 171 deletions.
9 changes: 6 additions & 3 deletions src/cooler_file_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <cstdint>
#include <filesystem>
#include <hictk/cooler/cooler.hpp>
#include <hictk/file.hpp>
#include <hictk/reference.hpp>
#include <hictk/tmpdir.hpp>
#include <hictk/type_traits.hpp>
Expand Down Expand Up @@ -113,8 +114,8 @@ void CoolerFileWriter::add_pixels(const nb::object &df) {
var);
}

void CoolerFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str,
std::size_t chunk_size, std::size_t update_freq) {
hictk::File CoolerFileWriter::finalize(std::string_view log_lvl_str, std::size_t chunk_size,
std::size_t update_freq) {
if (_finalized) {
throw std::runtime_error(
fmt::format(FMT_STRING("finalize() was already called on file \"{}\""), _path));
Expand Down Expand Up @@ -151,6 +152,8 @@ void CoolerFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str,
_w.reset();
std::filesystem::remove(sclr_path); // NOLINT
// NOLINTEND(*-unchecked-optional-access)

return hictk::File{_path.string()};
}

hictk::cooler::SingleCellFile CoolerFileWriter::create_file(std::string_view path,
Expand Down Expand Up @@ -212,7 +215,7 @@ void CoolerFileWriter::bind(nb::module_ &m) {
// NOLINTBEGIN(*-avoid-magic-numbers)
writer.def("finalize", &hictkpy::CoolerFileWriter::finalize, nb::arg("log_lvl") = "WARN",
nb::arg("chunk_size") = 500'000, nb::arg("update_frequency") = 10'000'000,
"Write interactions to file.");
"Write interactions to file.", nb::rv_policy::move);
// NOLINTEND(*-avoid-magic-numbers)
}
} // namespace hictkpy
34 changes: 19 additions & 15 deletions src/file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,9 @@ bool is_cooler(const std::filesystem::path &uri) {

bool is_hic(const std::filesystem::path &uri) { return hictk::hic::utils::is_hic_file(uri); }

static hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range1,
std::string_view range2, std::string_view normalization,
static hictkpy::PixelSelector fetch(const hictk::File &f, std::optional<std::string_view> range1,
std::optional<std::string_view> range2,
std::optional<std::string_view> normalization,
std::string_view count_type, bool join,
std::string_view query_type) {
if (count_type != "float" && count_type != "int") {
Expand All @@ -96,36 +97,39 @@ static hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range
throw std::runtime_error("query_type should be either UCSC or BED");
}

if (normalization != "NONE") {
const hictk::balancing::Method normalization_method{normalization.value_or("NONE")};

if (normalization_method != hictk::balancing::Method::NONE()) {
count_type = "float";
}

if (range1.empty()) {
assert(range2.empty());
if (!range1.has_value() || range1->empty()) {
assert(!range2.has_value() || range2->empty());
return std::visit(
[&](const auto &ff) {
auto sel = ff.fetch(hictk::balancing::Method{normalization});
auto sel = ff.fetch(normalization_method);
using SelT = decltype(sel);
return hictkpy::PixelSelector(std::make_shared<const SelT>(std::move(sel)), count_type,
join);
},
f.get());
}

if (range2.empty()) {
if (!range2.has_value() || range2->empty()) {
range2 = range1;
}

const auto query_type_ =
query_type == "UCSC" ? hictk::GenomicInterval::Type::UCSC : hictk::GenomicInterval::Type::BED;
const auto gi1 = hictk::GenomicInterval::parse(f.chromosomes(), std::string{range1}, query_type_);
const auto gi2 = hictk::GenomicInterval::parse(f.chromosomes(), std::string{range2}, query_type_);
const auto gi1 =
hictk::GenomicInterval::parse(f.chromosomes(), std::string{*range1}, query_type_);
const auto gi2 =
hictk::GenomicInterval::parse(f.chromosomes(), std::string{*range2}, query_type_);

return std::visit(
[&](const auto &ff) {
// Workaround bug fixed in https://github.com/paulsengroup/hictk/pull/158
auto sel = ff.fetch(fmt::format(FMT_STRING("{}"), gi1), fmt::format(FMT_STRING("{}"), gi2),
hictk::balancing::Method(normalization));
auto sel = ff.fetch(gi1.chrom().name(), gi1.start(), gi1.end(), gi2.chrom().name(),
gi2.start(), gi2.end(), normalization_method);

using SelT = decltype(sel);
return hictkpy::PixelSelector(std::make_shared<const SelT>(std::move(sel)), count_type,
Expand Down Expand Up @@ -307,9 +311,9 @@ void declare_file_class(nb::module_ &m) {
file.def("attributes", &file::attributes, "Get file attributes as a dictionary.",
nb::rv_policy::take_ownership);

file.def("fetch", &file::fetch, nb::keep_alive<0, 1>(), nb::arg("range1") = "",
nb::arg("range2") = "", nb::arg("normalization") = "NONE", nb::arg("count_type") = "int",
nb::arg("join") = false, nb::arg("query_type") = "UCSC",
file.def("fetch", &file::fetch, nb::keep_alive<0, 1>(), nb::arg("range1") = nb::none(),
nb::arg("range2") = nb::none(), nb::arg("normalization") = nb::none(),
nb::arg("count_type") = "int", nb::arg("join") = false, nb::arg("query_type") = "UCSC",
"Fetch interactions overlapping a region of interest.", nb::rv_policy::move);

file.def("avail_normalizations", &file::avail_normalizations,
Expand Down
22 changes: 17 additions & 5 deletions src/hic_file_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <cstddef>
#include <cstdint>
#include <filesystem>
#include <hictk/file.hpp>
#include <hictk/reference.hpp>
#include <hictk/tmpdir.hpp>
#include <stdexcept>
Expand Down Expand Up @@ -73,7 +74,7 @@ HiCFileWriter::HiCFileWriter(const std::filesystem::path &path_, const hictkpy::
assembly, n_threads, chunk_size, tmpdir, compression_lvl,
skip_all_vs_all_matrix) {}

void HiCFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str) {
hictk::File HiCFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str) {
if (_finalized) {
throw std::runtime_error(
fmt::format(FMT_STRING("finalize() was already called on file \"{}\""), _w.path()));
Expand All @@ -93,14 +94,25 @@ void HiCFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str) {
}
SPDLOG_INFO(FMT_STRING("successfully finalized \"{}\"!"), _w.path());
spdlog::default_logger()->set_level(previous_lvl);

return hictk::File{std::string{_w.path()}, _w.resolutions().front()};
}

std::filesystem::path HiCFileWriter::path() const noexcept {
return std::filesystem::path{_w.path()};
}

const std::vector<std::uint32_t> &HiCFileWriter::resolutions() const noexcept {
return _w.resolutions();
auto HiCFileWriter::resolutions() const {
using WeightVector = nb::ndarray<nb::numpy, nb::shape<-1>, nb::c_contig, std::uint32_t>;

// NOLINTNEXTLINE
auto *resolutions_ptr = new std::vector<std::uint32_t>(_w.resolutions());

auto capsule = nb::capsule(resolutions_ptr, [](void *vect_ptr) noexcept {
delete reinterpret_cast<std::vector<std::uint32_t> *>(vect_ptr); // NOLINT
});

return WeightVector{resolutions_ptr->data(), {resolutions_ptr->size()}, capsule};
}

const hictk::Reference &HiCFileWriter::chromosomes() const { return _w.chromosomes(); }
Expand Down Expand Up @@ -171,7 +183,7 @@ void HiCFileWriter::bind(nb::module_ &m) {

writer.def("path", &hictkpy::HiCFileWriter::path, "Get the file path.", nb::rv_policy::move);
writer.def("resolutions", &hictkpy::HiCFileWriter::resolutions,
"Get the list of resolutions in bp.", nb::rv_policy::move);
"Get the list of resolutions in bp.", nb::rv_policy::take_ownership);
writer.def("chromosomes", &get_chromosomes_from_object<hictkpy::HiCFileWriter>,
nb::arg("include_ALL") = false,
"Get chromosomes sizes as a dictionary mapping names to sizes.",
Expand All @@ -185,7 +197,7 @@ void HiCFileWriter::bind(nb::module_ &m) {
"either with columns=[bin1_id, bin2_id, count] or with columns=[chrom1, start1, end1, "
"chrom2, start2, end2, count].");
writer.def("finalize", &hictkpy::HiCFileWriter::finalize, nb::arg("log_lvl") = "WARN",
"Write interactions to file.");
"Write interactions to file.", nb::rv_policy::move);
}

} // namespace hictkpy
4 changes: 3 additions & 1 deletion src/include/hictkpy/cooler_file_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <cstdint>
#include <filesystem>
#include <hictk/cooler/singlecell_cooler.hpp>
#include <hictk/file.hpp>
#include <hictk/reference.hpp>
#include <hictk/tmpdir.hpp>
#include <optional>
Expand Down Expand Up @@ -43,7 +44,8 @@ class CoolerFileWriter {

void add_pixels(const nanobind::object& df);

void finalize(std::string_view log_lvl_str, std::size_t chunk_size, std::size_t update_frequency);
[[nodiscard]] hictk::File finalize(std::string_view log_lvl_str, std::size_t chunk_size,
std::size_t update_frequency);

[[nodiscard]] std::string repr() const;
static void bind(nanobind::module_& m);
Expand Down
43 changes: 0 additions & 43 deletions src/include/hictkpy/dynamic_1d_array.hpp

This file was deleted.

5 changes: 3 additions & 2 deletions src/include/hictkpy/hic_file_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <cstddef>
#include <cstdint>
#include <filesystem>
#include <hictk/file.hpp>
#include <hictk/hic/file_writer.hpp>
#include <hictk/reference.hpp>
#include <hictk/tmpdir.hpp>
Expand Down Expand Up @@ -40,14 +41,14 @@ class HiCFileWriter {
bool skip_all_vs_all_matrix);

[[nodiscard]] std::filesystem::path path() const noexcept;
[[nodiscard]] const std::vector<std::uint32_t>& resolutions() const noexcept;
[[nodiscard]] auto resolutions() const;

[[nodiscard]] const hictk::Reference& chromosomes() const;
[[nodiscard]] hictkpy::BinTable bins(std::uint32_t resolution) const;

void add_pixels(const nanobind::object& df);

void finalize(std::string_view log_lvl_str);
[[nodiscard]] hictk::File finalize(std::string_view log_lvl_str);

[[nodiscard]] std::string repr() const;

Expand Down
81 changes: 0 additions & 81 deletions src/include/hictkpy/impl/dynamic_1d_array_impl.hpp

This file was deleted.

20 changes: 17 additions & 3 deletions src/multires_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <hictk/cooler/validation.hpp>
#include <hictk/multires_file.hpp>
#include <string>
#include <vector>

#include "hictkpy/nanobind.hpp"
#include "hictkpy/reference.hpp"
Expand All @@ -27,6 +28,19 @@ static std::string repr(const hictk::MultiResFile& mrf) {

static std::filesystem::path get_path(const hictk::MultiResFile& mrf) { return mrf.path(); }

[[nodiscard]] static auto get_resolutions(const hictk::MultiResFile& f) {
using WeightVector = nb::ndarray<nb::numpy, nb::shape<-1>, nb::c_contig, std::uint32_t>;

// NOLINTNEXTLINE
auto* resolutions_ptr = new std::vector<std::uint32_t>(f.resolutions());

auto capsule = nb::capsule(resolutions_ptr, [](void* vect_ptr) noexcept {
delete reinterpret_cast<std::vector<std::uint32_t>*>(vect_ptr); // NOLINT
});

return WeightVector{resolutions_ptr->data(), {resolutions_ptr->size()}, capsule};
}

static nb::dict get_attrs(const hictk::hic::File& hf) {
nb::dict py_attrs;

Expand Down Expand Up @@ -59,7 +73,7 @@ static nb::dict get_attrs(const hictk::cooler::MultiResFile& mclr) {
static nb::dict attributes(const hictk::MultiResFile& f) {
auto attrs = f.is_hic() ? get_attrs(f.open(f.resolutions().front()).get<hictk::hic::File>())
: get_attrs(hictk::cooler::MultiResFile{f.path()});
attrs["resolutions"] = f.resolutions();
attrs["resolutions"] = get_resolutions(f);

return attrs;
}
Expand All @@ -84,8 +98,8 @@ void declare_multires_file_class(nb::module_& m) {
nb::arg("include_ALL") = false,
"Get chromosomes sizes as a dictionary mapping names to sizes.",
nb::rv_policy::take_ownership);
mres_file.def("resolutions", &hictk::MultiResFile::resolutions,
"Get the list of available resolutions.", nb::rv_policy::copy);
mres_file.def("resolutions", &get_resolutions, "Get the list of available resolutions.",
nb::rv_policy::take_ownership);
mres_file.def("attributes", &multires_file::attributes, "Get file attributes as a dictionary.",
nb::rv_policy::take_ownership);
mres_file.def("__getitem__", &hictk::MultiResFile::open,
Expand Down
Loading

0 comments on commit 1fd4c6d

Please sign in to comment.