Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make API more ergonomic #123

Merged
merged 10 commits into from
Nov 13, 2024
9 changes: 6 additions & 3 deletions src/cooler_file_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <cstdint>
#include <filesystem>
#include <hictk/cooler/cooler.hpp>
#include <hictk/file.hpp>
#include <hictk/reference.hpp>
#include <hictk/tmpdir.hpp>
#include <hictk/type_traits.hpp>
Expand Down Expand Up @@ -113,8 +114,8 @@ void CoolerFileWriter::add_pixels(const nb::object &df) {
var);
}

void CoolerFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str,
std::size_t chunk_size, std::size_t update_freq) {
hictk::File CoolerFileWriter::finalize(std::string_view log_lvl_str, std::size_t chunk_size,
std::size_t update_freq) {
if (_finalized) {
throw std::runtime_error(
fmt::format(FMT_STRING("finalize() was already called on file \"{}\""), _path));
Expand Down Expand Up @@ -151,6 +152,8 @@ void CoolerFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str,
_w.reset();
std::filesystem::remove(sclr_path); // NOLINT
// NOLINTEND(*-unchecked-optional-access)

return hictk::File{_path.string()};
}

hictk::cooler::SingleCellFile CoolerFileWriter::create_file(std::string_view path,
Expand Down Expand Up @@ -212,7 +215,7 @@ void CoolerFileWriter::bind(nb::module_ &m) {
// NOLINTBEGIN(*-avoid-magic-numbers)
writer.def("finalize", &hictkpy::CoolerFileWriter::finalize, nb::arg("log_lvl") = "WARN",
nb::arg("chunk_size") = 500'000, nb::arg("update_frequency") = 10'000'000,
"Write interactions to file.");
"Write interactions to file.", nb::rv_policy::move);
// NOLINTEND(*-avoid-magic-numbers)
}
} // namespace hictkpy
34 changes: 19 additions & 15 deletions src/file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,9 @@ bool is_cooler(const std::filesystem::path &uri) {

bool is_hic(const std::filesystem::path &uri) { return hictk::hic::utils::is_hic_file(uri); }

static hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range1,
std::string_view range2, std::string_view normalization,
static hictkpy::PixelSelector fetch(const hictk::File &f, std::optional<std::string_view> range1,
std::optional<std::string_view> range2,
std::optional<std::string_view> normalization,
std::string_view count_type, bool join,
std::string_view query_type) {
if (count_type != "float" && count_type != "int") {
Expand All @@ -96,36 +97,39 @@ static hictkpy::PixelSelector fetch(const hictk::File &f, std::string_view range
throw std::runtime_error("query_type should be either UCSC or BED");
}

if (normalization != "NONE") {
const hictk::balancing::Method normalization_method{normalization.value_or("NONE")};

if (normalization_method != hictk::balancing::Method::NONE()) {
count_type = "float";
}

if (range1.empty()) {
assert(range2.empty());
if (!range1.has_value() || range1->empty()) {
assert(!range2.has_value() || range2->empty());
return std::visit(
[&](const auto &ff) {
auto sel = ff.fetch(hictk::balancing::Method{normalization});
auto sel = ff.fetch(normalization_method);
using SelT = decltype(sel);
return hictkpy::PixelSelector(std::make_shared<const SelT>(std::move(sel)), count_type,
join);
},
f.get());
}

if (range2.empty()) {
if (!range2.has_value() || range2->empty()) {
range2 = range1;
}

const auto query_type_ =
query_type == "UCSC" ? hictk::GenomicInterval::Type::UCSC : hictk::GenomicInterval::Type::BED;
const auto gi1 = hictk::GenomicInterval::parse(f.chromosomes(), std::string{range1}, query_type_);
const auto gi2 = hictk::GenomicInterval::parse(f.chromosomes(), std::string{range2}, query_type_);
const auto gi1 =
hictk::GenomicInterval::parse(f.chromosomes(), std::string{*range1}, query_type_);
const auto gi2 =
hictk::GenomicInterval::parse(f.chromosomes(), std::string{*range2}, query_type_);

return std::visit(
[&](const auto &ff) {
// Workaround bug fixed in https://github.com/paulsengroup/hictk/pull/158
auto sel = ff.fetch(fmt::format(FMT_STRING("{}"), gi1), fmt::format(FMT_STRING("{}"), gi2),
hictk::balancing::Method(normalization));
auto sel = ff.fetch(gi1.chrom().name(), gi1.start(), gi1.end(), gi2.chrom().name(),
gi2.start(), gi2.end(), normalization_method);

using SelT = decltype(sel);
return hictkpy::PixelSelector(std::make_shared<const SelT>(std::move(sel)), count_type,
Expand Down Expand Up @@ -307,9 +311,9 @@ void declare_file_class(nb::module_ &m) {
file.def("attributes", &file::attributes, "Get file attributes as a dictionary.",
nb::rv_policy::take_ownership);

file.def("fetch", &file::fetch, nb::keep_alive<0, 1>(), nb::arg("range1") = "",
nb::arg("range2") = "", nb::arg("normalization") = "NONE", nb::arg("count_type") = "int",
nb::arg("join") = false, nb::arg("query_type") = "UCSC",
file.def("fetch", &file::fetch, nb::keep_alive<0, 1>(), nb::arg("range1") = nb::none(),
nb::arg("range2") = nb::none(), nb::arg("normalization") = nb::none(),
nb::arg("count_type") = "int", nb::arg("join") = false, nb::arg("query_type") = "UCSC",
"Fetch interactions overlapping a region of interest.", nb::rv_policy::move);

file.def("avail_normalizations", &file::avail_normalizations,
Expand Down
22 changes: 17 additions & 5 deletions src/hic_file_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <cstddef>
#include <cstdint>
#include <filesystem>
#include <hictk/file.hpp>
#include <hictk/reference.hpp>
#include <hictk/tmpdir.hpp>
#include <stdexcept>
Expand Down Expand Up @@ -73,7 +74,7 @@ HiCFileWriter::HiCFileWriter(const std::filesystem::path &path_, const hictkpy::
assembly, n_threads, chunk_size, tmpdir, compression_lvl,
skip_all_vs_all_matrix) {}

void HiCFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str) {
hictk::File HiCFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str) {
if (_finalized) {
throw std::runtime_error(
fmt::format(FMT_STRING("finalize() was already called on file \"{}\""), _w.path()));
Expand All @@ -93,14 +94,25 @@ void HiCFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str) {
}
SPDLOG_INFO(FMT_STRING("successfully finalized \"{}\"!"), _w.path());
spdlog::default_logger()->set_level(previous_lvl);

return hictk::File{std::string{_w.path()}, _w.resolutions().front()};
}

std::filesystem::path HiCFileWriter::path() const noexcept {
return std::filesystem::path{_w.path()};
}

const std::vector<std::uint32_t> &HiCFileWriter::resolutions() const noexcept {
return _w.resolutions();
auto HiCFileWriter::resolutions() const {
using WeightVector = nb::ndarray<nb::numpy, nb::shape<-1>, nb::c_contig, std::uint32_t>;

// NOLINTNEXTLINE
auto *resolutions_ptr = new std::vector<std::uint32_t>(_w.resolutions());

auto capsule = nb::capsule(resolutions_ptr, [](void *vect_ptr) noexcept {
delete reinterpret_cast<std::vector<std::uint32_t> *>(vect_ptr); // NOLINT
});

return WeightVector{resolutions_ptr->data(), {resolutions_ptr->size()}, capsule};
}

const hictk::Reference &HiCFileWriter::chromosomes() const { return _w.chromosomes(); }
Expand Down Expand Up @@ -171,7 +183,7 @@ void HiCFileWriter::bind(nb::module_ &m) {

writer.def("path", &hictkpy::HiCFileWriter::path, "Get the file path.", nb::rv_policy::move);
writer.def("resolutions", &hictkpy::HiCFileWriter::resolutions,
"Get the list of resolutions in bp.", nb::rv_policy::move);
"Get the list of resolutions in bp.", nb::rv_policy::take_ownership);
writer.def("chromosomes", &get_chromosomes_from_object<hictkpy::HiCFileWriter>,
nb::arg("include_ALL") = false,
"Get chromosomes sizes as a dictionary mapping names to sizes.",
Expand All @@ -185,7 +197,7 @@ void HiCFileWriter::bind(nb::module_ &m) {
"either with columns=[bin1_id, bin2_id, count] or with columns=[chrom1, start1, end1, "
"chrom2, start2, end2, count].");
writer.def("finalize", &hictkpy::HiCFileWriter::finalize, nb::arg("log_lvl") = "WARN",
"Write interactions to file.");
"Write interactions to file.", nb::rv_policy::move);
}

} // namespace hictkpy
4 changes: 3 additions & 1 deletion src/include/hictkpy/cooler_file_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <cstdint>
#include <filesystem>
#include <hictk/cooler/singlecell_cooler.hpp>
#include <hictk/file.hpp>
#include <hictk/reference.hpp>
#include <hictk/tmpdir.hpp>
#include <optional>
Expand Down Expand Up @@ -43,7 +44,8 @@ class CoolerFileWriter {

void add_pixels(const nanobind::object& df);

void finalize(std::string_view log_lvl_str, std::size_t chunk_size, std::size_t update_frequency);
[[nodiscard]] hictk::File finalize(std::string_view log_lvl_str, std::size_t chunk_size,
std::size_t update_frequency);

[[nodiscard]] std::string repr() const;
static void bind(nanobind::module_& m);
Expand Down
43 changes: 0 additions & 43 deletions src/include/hictkpy/dynamic_1d_array.hpp

This file was deleted.

5 changes: 3 additions & 2 deletions src/include/hictkpy/hic_file_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <cstddef>
#include <cstdint>
#include <filesystem>
#include <hictk/file.hpp>
#include <hictk/hic/file_writer.hpp>
#include <hictk/reference.hpp>
#include <hictk/tmpdir.hpp>
Expand Down Expand Up @@ -40,14 +41,14 @@ class HiCFileWriter {
bool skip_all_vs_all_matrix);

[[nodiscard]] std::filesystem::path path() const noexcept;
[[nodiscard]] const std::vector<std::uint32_t>& resolutions() const noexcept;
[[nodiscard]] auto resolutions() const;

[[nodiscard]] const hictk::Reference& chromosomes() const;
[[nodiscard]] hictkpy::BinTable bins(std::uint32_t resolution) const;

void add_pixels(const nanobind::object& df);

void finalize(std::string_view log_lvl_str);
[[nodiscard]] hictk::File finalize(std::string_view log_lvl_str);

[[nodiscard]] std::string repr() const;

Expand Down
81 changes: 0 additions & 81 deletions src/include/hictkpy/impl/dynamic_1d_array_impl.hpp

This file was deleted.

20 changes: 17 additions & 3 deletions src/multires_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <hictk/cooler/validation.hpp>
#include <hictk/multires_file.hpp>
#include <string>
#include <vector>

#include "hictkpy/nanobind.hpp"
#include "hictkpy/reference.hpp"
Expand All @@ -27,6 +28,19 @@ static std::string repr(const hictk::MultiResFile& mrf) {

static std::filesystem::path get_path(const hictk::MultiResFile& mrf) { return mrf.path(); }

[[nodiscard]] static auto get_resolutions(const hictk::MultiResFile& f) {
using WeightVector = nb::ndarray<nb::numpy, nb::shape<-1>, nb::c_contig, std::uint32_t>;

// NOLINTNEXTLINE
auto* resolutions_ptr = new std::vector<std::uint32_t>(f.resolutions());

auto capsule = nb::capsule(resolutions_ptr, [](void* vect_ptr) noexcept {
delete reinterpret_cast<std::vector<std::uint32_t>*>(vect_ptr); // NOLINT
});

return WeightVector{resolutions_ptr->data(), {resolutions_ptr->size()}, capsule};
}

static nb::dict get_attrs(const hictk::hic::File& hf) {
nb::dict py_attrs;

Expand Down Expand Up @@ -59,7 +73,7 @@ static nb::dict get_attrs(const hictk::cooler::MultiResFile& mclr) {
static nb::dict attributes(const hictk::MultiResFile& f) {
auto attrs = f.is_hic() ? get_attrs(f.open(f.resolutions().front()).get<hictk::hic::File>())
: get_attrs(hictk::cooler::MultiResFile{f.path()});
attrs["resolutions"] = f.resolutions();
attrs["resolutions"] = get_resolutions(f);

return attrs;
}
Expand All @@ -84,8 +98,8 @@ void declare_multires_file_class(nb::module_& m) {
nb::arg("include_ALL") = false,
"Get chromosomes sizes as a dictionary mapping names to sizes.",
nb::rv_policy::take_ownership);
mres_file.def("resolutions", &hictk::MultiResFile::resolutions,
"Get the list of available resolutions.", nb::rv_policy::copy);
mres_file.def("resolutions", &get_resolutions, "Get the list of available resolutions.",
nb::rv_policy::take_ownership);
mres_file.def("attributes", &multires_file::attributes, "Get file attributes as a dictionary.",
nb::rv_policy::take_ownership);
mres_file.def("__getitem__", &hictk::MultiResFile::open,
Expand Down
Loading
Loading