diff --git a/include/sdsl/bit_vectors.hpp b/include/sdsl/bit_vectors.hpp index b3b2b7174..fe35a2390 100644 --- a/include/sdsl/bit_vectors.hpp +++ b/include/sdsl/bit_vectors.hpp @@ -13,5 +13,6 @@ #include "rrr_vector.hpp" #include "sd_vector.hpp" #include "hyb_vector.hpp" +#include "hyb_sd_vector.hpp" #endif diff --git a/include/sdsl/bits.hpp b/include/sdsl/bits.hpp index 4e152faa3..94caa6cb2 100644 --- a/include/sdsl/bits.hpp +++ b/include/sdsl/bits.hpp @@ -11,12 +11,15 @@ #include // for uint64_t uint32_t declaration #include // for cerr #include -#ifdef __SSE4_2__ + +// clang-format off +#if 1 #include #endif -#ifdef __BMI2__ +#if 0 #include #endif +// clang-format on #ifdef WIN32 #include "iso646.h" @@ -490,9 +493,11 @@ struct bits_impl { template inline uint64_t bits_impl::cnt(uint64_t x) { -#ifdef __SSE4_2__ +// clang-format off +#if 1 return __builtin_popcountll(x); #else +// clang-format on #ifdef POPCOUNT_TL return lt_cnt[x & 0xFFULL] + lt_cnt[(x >> 8) & 0xFFULL] + lt_cnt[(x >> 16) & 0xFFULL] + lt_cnt[(x >> 24) & 0xFFULL] + lt_cnt[(x >> 32) & 0xFFULL] + lt_cnt[(x >> 40) & 0xFFULL] + @@ -579,11 +584,13 @@ inline uint64_t bits_impl::map01(uint64_t x, uint64_t c) template inline uint32_t bits_impl::sel(uint64_t x, uint32_t i) { -#ifdef __BMI2__ +// clang-format off +#if 0 // taken from folly return _tzcnt_u64(_pdep_u64(1ULL << (i - 1), x)); #endif -#ifdef __SSE4_2__ +#if 1 + // clang-format on uint64_t s = x, b; s = s - ((s >> 1) & 0x5555555555555555ULL); s = (s & 0x3333333333333333ULL) + ((s >> 2) & 0x3333333333333333ULL); @@ -653,10 +660,12 @@ inline uint32_t bits_impl::_sel(uint64_t x, uint32_t i) template inline uint32_t bits_impl::hi(uint64_t x) { -#ifdef __SSE4_2__ +// clang-format off +#if 1 if (x == 0) return 0; return 63 - __builtin_clzll(x); #else + // clang-format on uint64_t t, tt; // temporaries if ((tt = x >> 32)) { // hi >= 32 if ((t = tt >> 16)) { // hi >= 48 @@ -679,10 +688,12 @@ inline uint32_t bits_impl::hi(uint64_t x) template inline uint32_t bits_impl::lo(uint64_t x) { -#ifdef __SSE4_2__ +// clang-format off +#if 1 if (x == 0) return 0; return __builtin_ctzll(x); #else + // clang-format on if (x & 1) return 0; if (x & 3) return 1; if (x & 7) return 2; diff --git a/include/sdsl/coder_elias_delta.hpp b/include/sdsl/coder_elias_delta.hpp index c6cbedf34..7f28d2144 100644 --- a/include/sdsl/coder_elias_delta.hpp +++ b/include/sdsl/coder_elias_delta.hpp @@ -140,6 +140,18 @@ class elias_delta { { return v.m_data; } + + static uint64_t decode(const uint64_t*& z, uint8_t& offset) + { + size_type len_1_len; + len_1_len = bits::read_unary_and_move(z, offset); // read length of length of x + if (!len_1_len) { + return 1ULL; + } + size_type len = bits::read_int_and_move(z, offset, len_1_len) + (1ULL << len_1_len); + return bits::read_int_and_move(z, offset, len-1) + (len-1<64) * (1ULL << (len-1)); + } + }; // \sa coder::elias_delta::encoding_length diff --git a/include/sdsl/config.hpp b/include/sdsl/config.hpp index bd45b9c87..c2a59fda8 100644 --- a/include/sdsl/config.hpp +++ b/include/sdsl/config.hpp @@ -124,6 +124,31 @@ using key_text_trait = key_text_trait_impl; template using key_bwt_trait = key_bwt_trait_impl; +template +constexpr const char* key_text() +{ + return conf::KEY_TEXT_INT; +} + +template +constexpr const char* key_bwt() +{ + return conf::KEY_BWT_INT; +} + + +template<> +inline constexpr const char* key_text<8>() +{ + return conf::KEY_TEXT; +} + +template<> +inline constexpr const char* key_bwt<8>() +{ + return conf::KEY_BWT; +} + } #endif diff --git a/include/sdsl/construct_sa.hpp b/include/sdsl/construct_sa.hpp index fde397d5f..1b57538b9 100644 --- a/include/sdsl/construct_sa.hpp +++ b/include/sdsl/construct_sa.hpp @@ -11,6 +11,7 @@ #include "config.hpp" #include "int_vector.hpp" +#include "bits.hpp" #include "divsufsort.h" #include "divsufsort64.h" diff --git a/include/sdsl/csa_alphabet_strategy.hpp b/include/sdsl/csa_alphabet_strategy.hpp index e3eb42369..b571a8ae1 100644 --- a/include/sdsl/csa_alphabet_strategy.hpp +++ b/include/sdsl/csa_alphabet_strategy.hpp @@ -55,37 +55,20 @@ template > class succinct_byte_alphabet; -template , - class rank_support_type = typename bit_vector_type::rank_1_type, - class select_support_type = typename bit_vector_type::select_1_type, - class C_array_type = int_vector<>> +template, + typename select_support_type = select_support_scan<> + > +class succinct_multibyte_alphabet; + +template, + class rank_support_type = typename bit_vector_type::rank_1_type, + class select_support_type = typename bit_vector_type::select_1_type, + class C_array_type = int_vector<> + > class int_alphabet; -template -constexpr const char* key_text() -{ - return conf::KEY_TEXT_INT; -} - -template -constexpr const char* key_bwt() -{ - return conf::KEY_BWT_INT; -} - - -template<> -inline constexpr const char* key_text<8>() -{ - return conf::KEY_TEXT; -} - -template<> -inline constexpr const char* key_bwt<8>() -{ - return conf::KEY_BWT; -} - template struct alphabet_trait { typedef byte_alphabet type; @@ -253,6 +236,73 @@ class byte_alphabet { }; +//! Helper class for the char2comp mapping +template +class char2comp_wrapper +{ + private: + const t_alphabet_strat* m_strat; + public: + using comp_char_type = typename t_alphabet_strat::comp_char_type; + using char_type = typename t_alphabet_strat::char_type; + using size_type = typename t_alphabet_strat::size_type; + + char2comp_wrapper(const t_alphabet_strat* strat) : m_strat(strat) {} + + comp_char_type operator[](char_type c) const // TODO: using a const reference??? + { + if (c >= m_strat->m_char.size() or !m_strat->m_char[c]) + return (comp_char_type)0; + return (comp_char_type) m_strat->m_char_rank((size_type)c); + } + + template + typename std::enable_if<(t_strat::q>1), typename t_strat::multi_comp_char_type>::type + operator[](const typename std::enable_if<(t_strat::q>1), typename t_strat::multi_char_type>::type& c) const + { + typename t_strat::multi_comp_char_type x {0}; + auto sigma_size = m_strat->sigma; + for (size_t i=0; i < t_alphabet_strat::q; ++i) { + if (c >= m_strat->m_char.size() or !m_strat->m_char[c]) + return 0ULL; + x *= m_strat->sigma_q; + x += m_strat->m_char_rank((size_type)c); + } + return x; + } + +}; + +//! Helper class for the comp2char mapping +template +class comp2char_wrapper +{ + private: + const t_alphabet_strat* m_strat; + public: + using char_type = typename t_alphabet_strat::char_type; + using comp_char_type = typename t_alphabet_strat::comp_char_type; + using size_type = typename t_alphabet_strat::size_type; + + comp2char_wrapper(const t_alphabet_strat* strat) : m_strat(strat) {} + + char_type operator[](comp_char_type c) const // TODO: using a const reference??? + { + return (char_type) m_strat->m_char_select(((size_type)c)+1); + } + + template + typename std::enable_if<(t_strat::q>1), typename t_strat::multi_char_type>::type + operator[](typename std::enable_if<(t_strat::q>1), typename t_strat::multi_comp_char_type>::type c) const + { + std::cout<<"TODO comp2char multi_byte x="<(c)<<" t_alphabet_strat::q="<<(int)t_alphabet_strat::q< -class succinct_byte_alphabet { -public: - class char2comp_wrapper; - class comp2char_wrapper; - friend class char2comp_wrapper; - friend class comp2char_wrapper; - - typedef int_vector<>::size_type size_type; - typedef char2comp_wrapper char2comp_type; - typedef comp2char_wrapper comp2char_type; - typedef C_array_type C_type; - typedef uint16_t sigma_type; - typedef uint8_t char_type; - typedef uint8_t comp_char_type; - typedef std::string string_type; - typedef byte_alphabet_tag alphabet_category; - enum { int_width = 8 }; - - //! Helper class for the char2comp mapping - class char2comp_wrapper { - private: - const succinct_byte_alphabet* m_strat; - - public: - char2comp_wrapper(const succinct_byte_alphabet* strat) : m_strat(strat) {} - comp_char_type operator[](char_type c) const - { - if (c >= m_strat->m_char.size() or !m_strat->m_char[c]) return (comp_char_type)0; - return (comp_char_type)m_strat->m_char_rank((size_type)c); - } - }; - - //! Helper class for the comp2char mapping - class comp2char_wrapper { - private: - const succinct_byte_alphabet* m_strat; - - public: - comp2char_wrapper(const succinct_byte_alphabet* strat) : m_strat(strat) {} - char_type operator[](comp_char_type c) const - { - return (char_type)m_strat->m_char_select(((size_type)c) + 1); - } - }; - - const char2comp_type char2comp; - const comp2char_type comp2char; - const C_type& C; - const sigma_type& sigma; - -private: - bit_vector_type m_char; // `m_char[i]` indicates if character with code i is present or not - rank_support_type m_char_rank; // rank data structure for `m_char` to answer char2comp - select_support_type m_char_select; // select data structure for `m_char` to answer comp2char - C_type m_C; // cumulative counts for the compact alphabet [0..sigma] - sigma_type m_sigma; // effective size of the alphabet - -public: - //! Default constructor - succinct_byte_alphabet() : char2comp(this), comp2char(this), C(m_C), sigma(m_sigma), m_sigma(0) - { - } +template +class succinct_byte_alphabet +{ + public: + static constexpr uint8_t q = 1; + + friend class char2comp_wrapper; + friend class comp2char_wrapper; + typedef char2comp_wrapper char2comp_type; + typedef comp2char_wrapper comp2char_type; + + typedef int_vector<>::size_type size_type; + typedef C_array_type C_type; + typedef uint16_t sigma_type; + typedef sigma_type multi_sigma_type; + typedef uint8_t char_type; + typedef uint8_t comp_char_type; + typedef std::string string_type; + typedef byte_alphabet_tag alphabet_category; + enum { int_width = 8 }; + + private: + bit_vector_type m_char; // `m_char[i]` indicates if character with code i is present or not + rank_support_type m_char_rank; // rank data structure for `m_char` to answer char2comp + select_support_type m_char_select; // select data structure for `m_char` to answer comp2char + C_type m_C; // cumulative counts for the compact alphabet [0..sigma] + sigma_type m_sigma; // effective size of the alphabet + + void copy(const succinct_byte_alphabet& strat) + { + m_char = strat.m_char; + m_char_rank = strat.m_char_rank; + m_char_rank.set_vector(&m_char); + m_char_select = strat.m_char_select; + m_char_select.set_vector(&m_char); + m_C = strat.m_C; + m_sigma = strat.m_sigma; + } + public: + + const char2comp_type char2comp; + const comp2char_type comp2char; + const C_type& C; + const sigma_type& sigma; + const multi_sigma_type& sigma_q; + + //! Default constructor + succinct_byte_alphabet() : char2comp(this), comp2char(this), C(m_C), sigma(m_sigma), sigma_q(m_sigma) + { + m_sigma = 0; + } - //! Construct from a byte-stream - /*! + //! Construct from a byte-stream + /*! * \param text_buf Byte stream. * \param len Length of the byte stream. */ - succinct_byte_alphabet(int_vector_buffer<8>& text_buf, int_vector_size_type len) - : char2comp(this), comp2char(this), C(m_C), sigma(m_sigma) - { - m_sigma = 0; - if (0 == len or 0 == text_buf.size()) return; - assert(len <= text_buf.size()); - // initialize vectors - int_vector<64> D(257, 0); - bit_vector tmp_char(256, 0); - // count occurrences of each symbol - for (size_type i = 0; i < len; ++i) { - ++D[text_buf[i]]; - } - assert(1 == D[0]); // null-byte should occur exactly once - m_sigma = 0; - for (int i = 0; i < 256; ++i) - if (D[i]) { - tmp_char[i] = 1; // mark occurring character - D[m_sigma] = D[i]; // compactify m_C - ++m_sigma; - } - // resize to sigma+1, since CSAs also need the sum of all elements - m_C = C_type(m_sigma + 1, 0, bits::hi(len) + 1); - - for (int i = (int)m_sigma; i > 0; --i) - m_C[i] = D[i - 1]; - m_C[0] = 0; - for (int i = 1; i <= (int)m_sigma; ++i) - m_C[i] = m_C[i] + m_C[i - 1]; - assert(m_C[sigma] == len); - m_char = tmp_char; - util::init_support(m_char_rank, &m_char); - util::init_support(m_char_select, &m_char); - } + succinct_byte_alphabet(int_vector_buffer<8>& text_buf, int_vector_size_type len): + char2comp(this), comp2char(this), C(m_C), sigma(m_sigma), sigma_q(m_sigma) + { + m_sigma = 0; + if (0 == len or 0 == text_buf.size()) + return; + assert(len <= text_buf.size()); + // initialize vectors + int_vector<64> D(257, 0); + bit_vector tmp_char(256, 0); + // count occurrences of each symbol + for (size_type i=0; i < len; ++i) { + ++D[text_buf[i]]; + } + assert(1 == D[0]); // null-byte should occur exactly once + m_sigma = 0; + for (int i=0; i<256; ++i) + if (D[i]) { + tmp_char[i] = 1; // mark occurring character + D[m_sigma] = D[i]; // compactify m_C + ++m_sigma; + } + // resize to sigma+1, since CSAs also need the sum of all elements + m_C = C_type(m_sigma+1, 0, bits::hi(len)+1); + + for (int i=(int)m_sigma; i > 0; --i) m_C[i] = D[i-1]; + m_C[0] = 0; + for (int i=1; i <= (int)m_sigma; ++i) m_C[i] = m_C[i] + m_C[i-1]; + assert(m_C[sigma]==len); + m_char = tmp_char; + util::init_support(m_char_rank, &m_char); + util::init_support(m_char_select, &m_char); + } - //! Copy constructor - succinct_byte_alphabet(const succinct_byte_alphabet& strat) - : char2comp(this) - , comp2char(this) - , C(m_C) - , sigma(m_sigma) - , m_char(strat.m_char) - , m_char_rank(strat.m_char_rank) - , m_char_select(strat.m_char_select) - , m_C(strat.m_C) - , m_sigma(strat.m_sigma) - { - m_char_rank.set_vector(&m_char); - m_char_select.set_vector(&m_char); - } + //! Copy constructor + succinct_byte_alphabet(const succinct_byte_alphabet& strat): char2comp(this), comp2char(this), C(m_C), sigma(m_sigma), sigma_q(m_sigma) + { + copy(strat); + } - //! Move constructor - succinct_byte_alphabet(succinct_byte_alphabet&& strat) - : char2comp(this) - , comp2char(this) - , C(m_C) - , sigma(m_sigma) - , m_char(std::move(strat.m_char)) - , m_char_rank(std::move(strat.m_char_rank)) - , m_char_select(std::move(strat.m_char_select)) - , m_C(std::move(strat.m_C)) - , m_sigma(std::move(strat.m_sigma)) - { - m_char_rank.set_vector(&m_char); - m_char_select.set_vector(&m_char); - } + //! Move constructor + succinct_byte_alphabet(succinct_byte_alphabet&& strat) + { + *this = std::move(strat); + } + succinct_byte_alphabet& operator=(const succinct_byte_alphabet& strat) + { + if (this != &strat) { + copy(strat); + } + return *this; + } - succinct_byte_alphabet& operator=(const succinct_byte_alphabet& strat) - { - if (this != &strat) { - succinct_byte_alphabet tmp(strat); - *this = std::move(tmp); - } - return *this; - } + succinct_byte_alphabet& operator=(succinct_byte_alphabet&& strat) + { + if (this != &strat) { + m_char = std::move(strat.m_char); + m_char_rank = std::move(strat.m_char_rank); + m_char_rank.set_vector(&m_char); + m_char_select = std::move(strat.m_char_select); + m_char_select.set_vector(&m_char); + m_C = std::move(strat.m_C); + m_sigma = std::move(strat.m_sigma); + } + return *this; + } - succinct_byte_alphabet& operator=(succinct_byte_alphabet&& strat) - { - if (this != &strat) { - m_char = std::move(strat.m_char); - m_char_rank = std::move(strat.m_char_rank); - m_char_rank.set_vector(&m_char); - m_char_select = std::move(strat.m_char_select); - m_char_select.set_vector(&m_char); - m_C = std::move(strat.m_C); - m_sigma = std::move(strat.m_sigma); - } - return *this; - } + //! Serialize method + size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const + { + structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_char.serialize(out, child, "m_char"); + written_bytes += m_char_rank.serialize(out, child, "m_char_rank"); + written_bytes += m_char_select.serialize(out, child, "m_char_select"); + written_bytes += m_C.serialize(out, child, "m_C"); + written_bytes += write_member(m_sigma, out, child, "m_sigma"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } - //! Serialize method - size_type - serialize(std::ostream& out, structure_tree_node* v = nullptr, std::string name = "") const - { - structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); - size_type written_bytes = 0; - written_bytes += m_char.serialize(out, child, "m_char"); - written_bytes += m_char_rank.serialize(out, child, "m_char_rank"); - written_bytes += m_char_select.serialize(out, child, "m_char_select"); - written_bytes += m_C.serialize(out, child, "m_C"); - written_bytes += write_member(m_sigma, out, child, "m_sigma"); - structure_tree::add_size(child, written_bytes); - return written_bytes; - } + //! Load method + void load(std::istream& in) + { + m_char.load(in); + m_char_rank.load(in); + m_char_rank.set_vector(&m_char); + m_char_select.load(in); + m_char_select.set_vector(&m_char); + m_C.load(in); + read_member(m_sigma, in); + } +}; - //! Load method - void load(std::istream& in) - { - m_char.load(in); - m_char_rank.load(in); - m_char_rank.set_vector(&m_char); - m_char_select.load(in); - m_char_select.set_vector(&m_char); - m_C.load(in); - read_member(m_sigma, in); - } +class multibyte_tag; +class multibyte_comp_char; + +class multibyte_comp_char +{ + private: + uint64_t m_x; // value + public: + typedef multibyte_tag type; + + template + friend bool cyclic_insert_hi(typename t_alphabet_strat::multi_comp_char_type&, + typename t_alphabet_strat::char_type, + const t_alphabet_strat&); + + template + friend bool cyclic_insert_lo(typename t_alphabet_strat::multi_comp_char_type&, + typename t_alphabet_strat::char_type, + const t_alphabet_strat&); + + multibyte_comp_char(uint64_t x) : m_x(x) {} + + explicit operator uint64_t() const + { + return m_x; + } + + multibyte_comp_char operator+(uint64_t add)const + { + return multibyte_comp_char(m_x+add); + } }; -template -void init_char_bitvector(bit_vector_type& char_bv, const std::map &D) { - // note: the alphabet has at least size 1, so the following is safe: - auto largest_symbol = (--D.end())->first; - bit_vector tmp_char(largest_symbol + 1, 0); - for (const auto& x : D ) { - tmp_char[x.first] = 1; - } - char_bv = tmp_char; +template +bool cyclic_insert_hi(typename t_alphabet_strat::multi_comp_char_type& mc, + typename t_alphabet_strat::char_type c, + const t_alphabet_strat& alphabet) +{ + auto cc = alphabet.char2comp[c]; + if (cc == 0 and c > 0) + return false; +// std::cout<<"mc.mx="<& text_buf, int_vector_size_type len): + char2comp(this), comp2char(this), C(m_C), sigma(m_sigma), sigma_q(m_sigma_q), + sigma_q_1(m_sigma_q_1) + { + m_sigma = 0; + if (0 == len or 0 == text_buf.size()) + return; + assert(len <= text_buf.size()); + // initialize vectors + int_vector<64> D(257, 0); + bit_vector tmp_char(256, 0); + // count occurrences of each symbol +// std::cout<<"text="; + for (size_type i=0; i < len; ++i) { + ++D[text_buf[i]]; +// std::cout<<(char)text_buf[i]; + } +// std::cout<(m_sigma+1, 0, bits::hi(len)+1); + m_C.multi_C = int_vector<>(m_sigma_q+1, 0, bits::hi(len)+1); + + for (int i=(int)m_sigma; i > 0; --i) m_C.C[i] = D[i-1]; + m_C.C[0] = 0; + for (int i=1; i <= (int)m_sigma; ++i) m_C.C[i] = m_C.C[i] + m_C.C[i-1]; + assert(m_C.C[sigma]==len); + m_char = tmp_char; + util::init_support(m_char_rank, &m_char); + util::init_support(m_char_select, &m_char); + if (t_q == 1) { + m_C.multi_C = m_C.C; + } else if (t_q > 1) { + int_vector<64> multi_D(m_sigma_q+1, 0); + // count occurrences of each symbol + uint64_t x = 0; + for (size_type i=0; i &cha * The types to represent `char2comp`, `comp2char`, and `C` can be specified * by template parameters. */ -template -class int_alphabet { -public: - class char2comp_wrapper; - class comp2char_wrapper; - friend class char2comp_wrapper; - friend class comp2char_wrapper; +template +class int_alphabet +{ + public: + class char2comp_wrapper_int; + class comp2char_wrapper_int; + friend class char2comp_wrapper_int; + friend class comp2char_wrapper_int; + + typedef int_vector<>::size_type size_type; + typedef char2comp_wrapper_int char2comp_type; + typedef comp2char_wrapper_int comp2char_type; + typedef C_array_type C_type; + typedef uint64_t sigma_type; + typedef uint64_t char_type; + typedef uint64_t comp_char_type; + typedef std::vector string_type; + typedef int_alphabet_tag alphabet_category; + enum { int_width = 0 }; + + //! Helper class for the char2comp mapping + class char2comp_wrapper_int + { + private: + const int_alphabet* m_strat; + public: + char2comp_wrapper_int(const int_alphabet* strat) : m_strat(strat) {} + comp_char_type operator[](char_type c) const + { + if (m_strat->m_char.size() > 0) { // if alphabet is not continuous + if (c >= m_strat->m_char.size() or !m_strat->m_char[c]) + return (comp_char_type)0; + return (comp_char_type) m_strat->m_char_rank((size_type)c); + } else { // direct map if it is continuous + if (c >= m_strat->m_sigma) + return 0; + return (comp_char_type) c; + } + return 0; + } + }; + + //! Helper class for the comp2char mapping + class comp2char_wrapper_int + { + private: + const int_alphabet* m_strat; + public: + comp2char_wrapper_int(const int_alphabet* strat) : m_strat(strat) {} + char_type operator[](comp_char_type c) const + { + if (m_strat->m_char.size() > 0) { // if alphabet is not continuous + return (char_type) m_strat->m_char_select(((size_type)c)+1); + } else { // direct map if it is continuous + return (char_type) c; + } + } + }; + + private: + bit_vector_type m_char; // `m_char[i]` indicates if character with code i is present or not + rank_support_type m_char_rank; // rank data structure for `m_char` to answer char2comp + select_support_type m_char_select; // select data structure for `m_char` to answer comp2char + C_type m_C; // cumulative counts for the compact alphabet [0..sigma] + sigma_type m_sigma; // effective size of the alphabet + + void copy(const int_alphabet& strat) + { + m_char = strat.m_char; + m_char_rank = strat.m_char_rank; + m_char_rank.set_vector(&m_char); + m_char_select = strat.m_char_select; + m_char_select.set_vector(&m_char); + m_C = strat.m_C; + m_sigma = strat.m_sigma; + } - typedef int_vector<>::size_type size_type; - typedef char2comp_wrapper char2comp_type; - typedef comp2char_wrapper comp2char_type; - typedef C_array_type C_type; - typedef uint64_t sigma_type; - typedef uint64_t char_type; - typedef uint64_t comp_char_type; - typedef std::vector string_type; - typedef int_alphabet_tag alphabet_category; - enum { int_width = 0 }; - - //! Helper class for the char2comp mapping - class char2comp_wrapper { - private: - const int_alphabet* m_strat; - - public: - char2comp_wrapper(const int_alphabet* strat) : m_strat(strat) {} - comp_char_type operator[](char_type c) const - { - if (m_strat->m_char.size() > 0) { // if alphabet is not continuous - if (c >= m_strat->m_char.size() or !m_strat->m_char[c]) return (comp_char_type)0; - return (comp_char_type)m_strat->m_char_rank((size_type)c); - } else { // direct map if it is continuous - if (c >= m_strat->m_sigma) return 0; - return (comp_char_type)c; - } - return 0; - } - }; - - //! Helper class for the comp2char mapping - class comp2char_wrapper { - private: - const int_alphabet* m_strat; - - public: - comp2char_wrapper(const int_alphabet* strat) : m_strat(strat) {} - char_type operator[](comp_char_type c) const - { - if (m_strat->m_char.size() > 0) { // if alphabet is not continuous - return (char_type)m_strat->m_char_select(((size_type)c) + 1); - } else { // direct map if it is continuous - return (char_type)c; - } - } - }; + //! Check if the alphabet is continuous. + bool is_continuous_alphabet(std::map& D) + { + if (D.size() == 0) { // an empty alphabet is continuous + return true; + } else { + // max key + 1 == size of map + return ((--D.end())->first + 1) == D.size(); + } + } - const char2comp_type char2comp; - const comp2char_type comp2char; - const C_type& C; - const sigma_type& sigma; + public: -private: - bit_vector_type m_char; // `m_char[i]` indicates if character with code i is present or not - rank_support_type m_char_rank; // rank data structure for `m_char` to answer char2comp - select_support_type m_char_select; // select data structure for `m_char` to answer comp2char - C_type m_C; // cumulative counts for the compact alphabet [0..sigma] - sigma_type m_sigma; // effective size of the alphabet - - //! Check if the alphabet is continuous. - bool is_continuous_alphabet(std::map& D) - { - if (D.size() == 0) { // an empty alphabet is continuous - return true; - } else { - // max key + 1 == size of map - return ((--D.end())->first + 1) == D.size(); - } - } + const char2comp_type char2comp; + const comp2char_type comp2char; + const C_type& C; + const sigma_type& sigma; -public: - //! Default constructor - int_alphabet() : char2comp(this), comp2char(this), C(m_C), sigma(m_sigma), m_sigma(0) {} + //! Default constructor + int_alphabet() : char2comp(this), comp2char(this), C(m_C), sigma(m_sigma) + { + m_sigma = 0; + } - //! Construct from a byte-stream - /*! + //! Construct from a byte-stream + /*! * \param text_buf Byte stream. * \param len Length of the byte stream. */ @@ -593,7 +915,15 @@ class int_alphabet { if (is_continuous_alphabet(D)) { // do not initialize m_char, m_char_rank and m_char_select since we can map directly } else { - init_char_bitvector(m_char, D); + // note: the alphabet has at least size 1, so the following is safe: + size_type largest_symbol = (--D.end())->first; + bit_vector tmp_char(largest_symbol+1, 0); + for (std::map::const_iterator it = D.begin(), end=D.end(); it != end; ++it) { + tmp_char[it->first] = 1; + } + m_char = tmp_char; + util::init_support(m_char_rank, &m_char); + util::init_support(m_char_select, &m_char); } assert(D.find(0) != D.end() and 1 == D[0]); // null-byte should occur exactly once diff --git a/include/sdsl/csa_bitcompressed.hpp b/include/sdsl/csa_bitcompressed.hpp index e69e84401..72c8f8879 100644 --- a/include/sdsl/csa_bitcompressed.hpp +++ b/include/sdsl/csa_bitcompressed.hpp @@ -215,12 +215,12 @@ class csa_bitcompressed { private: // Calculates how many symbols c are in the prefix [0..i-1] of the BWT of the original text. /* - * \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$. - * \param c The symbol to count the occurrences in the prefix. - * \returns The number of occurrences of symbol c in the prefix [0..i-1] of the BWT. - * \par Time complexity - * \f$ \Order{\log n} \f$ - */ + * \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$. + * \param c The symbol to count the occurrences in the prefix. + * \returns The number of occurrences of symbol c in the prefix [0..i-1] of the BWT. + * \par Time complexity + * \f$ \Order{\log n} \f$ + */ size_type rank_bwt(size_type i, const char_type c) const { // TODO: special case if c == BWT[i-1] we can use LF to get a constant time answer @@ -244,15 +244,29 @@ class csa_bitcompressed { } } - // Calculates the i-th occurrence of symbol c in the BWT of the original text. - /* - * \param i The i-th occurrence. \f$i\in [1..rank(size(),c)]\f$. - * \param c Character c. - * \returns The i-th occurrence of c in the BWT or size() if c does - * not occur t times in BWT> - * \par Time complexity - * \f$ \Order{t_{\Psi}} \f$ - */ + // Calculates how many symbols c are in the prefix [0..ij[0]-1] and [0..ij[1]-1] of the BWT of the original text. + /* \param ij The exlusive indices of the prefix ranges [0..ij[0]] and [0..ij[1]] + * \param c The symbol to count + * \returns An array of size two which contains the occurrences of symbols c in the prefix [0..ij[0]-1] and [0..ij[1]-1] + * \par Time compelxity + * \f$ \Order{\log n} \f$ + */ + std::array + rank_bwt(std::array ij, const char_type c)const + { + return {rank_bwt(ij[0], c), rank_bwt(ij[1],c)}; + } + + + // Calculates the i-th occurrence of symbol c in the BWT of the original text. + /* + * \param i The i-th occurrence. \f$i\in [1..rank(size(),c)]\f$. + * \param c Character c. + * \returns The i-th occurrence of c in the BWT or size() if c does + * not occur t times in BWT> + * \par Time complexity + * \f$ \Order{t_{\Psi}} \f$ + */ size_type select_bwt(size_type i, const char_type c) const { comp_char_type cc = char2comp[c]; diff --git a/include/sdsl/csa_sada.hpp b/include/sdsl/csa_sada.hpp index 38c3aedf8..8451c9656 100644 --- a/include/sdsl/csa_sada.hpp +++ b/include/sdsl/csa_sada.hpp @@ -9,6 +9,7 @@ #define INCLUDED_SDSL_CSA_SADA #include "enc_vector.hpp" +#include "enc_vector2.hpp" #include "int_vector.hpp" #include "iterators.hpp" #include "suffix_array_helper.hpp" @@ -36,130 +37,134 @@ namespace sdsl { * \sa sdsl::csa_wt, sdsl::csa_bitcompressed * @ingroup csa */ -template , // Vector type used to store the Psi-function - uint32_t t_dens = 32, // Sample density for suffix array (SA) values - uint32_t t_inv_dens = 64, // Sample density for inverse suffix array (ISA) values - class t_sa_sample_strat = sa_order_sa_sampling<>, // Policy class for the SA sampling. - class t_isa_sample_strat = isa_sampling<>, // Policy class for ISA sampling. - class t_alphabet_strat = - byte_alphabet // Policy class for the representation of the alphabet. - > -class csa_sada { - static_assert(is_enc_vec::value, - "First template argument has to be of type env_vector."); - static_assert(t_dens > 0, "Second template argument has to be greater then 0."); - static_assert(t_inv_dens > 0, "Third template argument has to be greater then 0."); - static_assert( - std::is_same::type, sa_sampling_tag>::value, - "Forth template argument has to be a suffix array sampling strategy."); - static_assert( - std::is_same::type, isa_sampling_tag>::value, - "Fifth template argument has to be a inverse suffix array sampling strategy."); - static_assert(is_alphabet::value, - "Sixth template argument has to be a alphabet strategy."); - - friend class bwt_of_csa_psi; - -public: - enum { sa_sample_dens = t_dens, isa_sample_dens = t_inv_dens }; - - typedef uint64_t value_type; - typedef random_access_const_iterator const_iterator; - typedef const_iterator iterator; - typedef const value_type const_reference; - typedef const_reference reference; - typedef const_reference* pointer; - typedef const pointer const_pointer; - typedef int_vector<>::size_type size_type; - typedef size_type csa_size_type; - typedef ptrdiff_t difference_type; - typedef t_enc_vec enc_vector_type; - typedef enc_vector_type psi_type; - typedef traverse_csa_psi lf_type; - typedef bwt_of_csa_psi bwt_type; - typedef isa_of_csa_psi isa_type; - typedef text_of_csa text_type; - typedef first_row_of_csa first_row_type; - typedef typename t_sa_sample_strat::template type sa_sample_type; - typedef typename t_isa_sample_strat::template type isa_sample_type; - typedef t_alphabet_strat alphabet_type; - typedef typename alphabet_type::alphabet_category alphabet_category; - typedef typename alphabet_type::comp_char_type comp_char_type; - typedef typename alphabet_type::char_type - char_type; // Note: This is the char type of the CSA not the WT! - typedef typename alphabet_type::string_type string_type; - typedef csa_sada csa_type; - - typedef csa_tag index_category; - typedef psi_tag extract_category; - - friend class traverse_csa_psi; - friend class traverse_csa_psi; - - static const uint32_t linear_decode_limit = 100000; - -private: - enc_vector_type m_psi; // psi function - sa_sample_type m_sa_sample; // suffix array samples - isa_sample_type m_isa_sample; // inverse suffix array samples - alphabet_type m_alphabet; // alphabet component - - mutable std::vector m_psi_buf; // buffer for decoded psi values - - void create_buffer() - { - if (enc_vector_type::sample_dens < linear_decode_limit) { - m_psi_buf = std::vector(enc_vector_type::sample_dens + 1); - } - } - -public: - const typename alphabet_type::char2comp_type& char2comp = m_alphabet.char2comp; - const typename alphabet_type::comp2char_type& comp2char = m_alphabet.comp2char; - const typename alphabet_type::C_type& C = m_alphabet.C; - const typename alphabet_type::sigma_type& sigma = m_alphabet.sigma; - const psi_type& psi = m_psi; - const lf_type lf = lf_type(*this); - const bwt_type bwt = bwt_type(*this); - const isa_type isa = isa_type(*this); - const bwt_type L = bwt_type(*this); - const first_row_type F = first_row_type(*this); - const text_type text = text_type(*this); - const sa_sample_type& sa_sample = m_sa_sample; - const isa_sample_type& isa_sample = m_isa_sample; - - - //! Default Constructor - csa_sada() { create_buffer(); } - //! Default Destructor - ~csa_sada() {} - - //! Copy constructor - csa_sada(const csa_sada& csa) - : m_psi(csa.m_psi) - , m_sa_sample(csa.m_sa_sample) - , m_isa_sample(csa.m_isa_sample) - , m_alphabet(csa.m_alphabet) - { - create_buffer(); - m_isa_sample.set_vector(&m_sa_sample); - } - - //! Move constructor - csa_sada(csa_sada&& csa) - : m_psi(std::move(csa.m_psi)) - , m_sa_sample(std::move(csa.m_sa_sample)) - , m_isa_sample(std::move(csa.m_isa_sample)) - , m_alphabet(std::move(csa.m_alphabet)) - { - create_buffer(); - m_isa_sample.set_vector(&m_sa_sample); - } - - csa_sada(cache_config& config); - - //! Number of elements in the \f$\CSA\f$. - /*! Required for the Container Concept of the STL. +template, // Vector type used to store the Psi-function + uint32_t t_dens = 32, // Sample density for suffix array (SA) values + uint32_t t_inv_dens = 64, // Sample density for inverse suffix array (ISA) values + class t_sa_sample_strat = sa_order_sa_sampling<>,// Policy class for the SA sampling. + class t_isa_sample_strat= isa_sampling<>, // Policy class for ISA sampling. + class t_alphabet_strat = byte_alphabet // Policy class for the representation of the alphabet. + > +class csa_sada +{ + static_assert(is_enc_vec::value, + "First template argument has to be of type env_vector."); + static_assert(t_dens > 0, + "Second template argument has to be greater then 0."); + static_assert(t_inv_dens > 0, + "Third template argument has to be greater then 0."); + static_assert(std::is_same::type, sa_sampling_tag>::value, + "Forth template argument has to be a suffix array sampling strategy."); + static_assert(std::is_same::type, isa_sampling_tag>::value, + "Fifth template argument has to be a inverse suffix array sampling strategy."); + static_assert(is_alphabet::value, + "Sixth template argument has to be a alphabet strategy."); + + friend class bwt_of_csa_psi; + public: + enum { sa_sample_dens = t_dens, + isa_sample_dens = t_inv_dens + }; + + typedef uint64_t value_type; + typedef random_access_const_iterator const_iterator; + typedef const_iterator iterator; + typedef const value_type const_reference; + typedef const_reference reference; + typedef const_reference* pointer; + typedef const pointer const_pointer; + typedef int_vector<>::size_type size_type; + typedef size_type csa_size_type; + typedef ptrdiff_t difference_type; + typedef t_enc_vec enc_vector_type; + typedef enc_vector_type psi_type; + typedef traverse_csa_psi lf_type; + typedef bwt_of_csa_psi bwt_type; + typedef isa_of_csa_psi isa_type; + typedef text_of_csa text_type; + typedef first_row_of_csa first_row_type; + typedef typename t_sa_sample_strat::template type sa_sample_type; + typedef typename t_isa_sample_strat::template type isa_sample_type; + typedef t_alphabet_strat alphabet_type; + typedef typename alphabet_type::alphabet_category alphabet_category; + typedef typename alphabet_type::comp_char_type comp_char_type; + typedef typename alphabet_type::char_type char_type; // Note: This is the char type of the CSA not the WT! + typedef typename alphabet_type::string_type string_type; + typedef csa_sada csa_type; + + typedef csa_tag index_category; + typedef psi_tag extract_category; + + friend class traverse_csa_psi; + friend class traverse_csa_psi; + + static const uint32_t linear_decode_limit = 100000; + private: + enc_vector_type m_psi; // psi function + sa_sample_type m_sa_sample; // suffix array samples + isa_sample_type m_isa_sample; // inverse suffix array samples + alphabet_type m_alphabet; // alphabet component + + mutable std::vector m_psi_buf; // buffer for decoded psi values + + void copy(const csa_sada& csa) + { + m_psi = csa.m_psi; + m_sa_sample = csa.m_sa_sample; + m_isa_sample = csa.m_isa_sample; + m_isa_sample.set_vector(&m_sa_sample); + m_alphabet = csa.m_alphabet; + }; + + void create_buffer() + { + if (enc_vector_type::sample_dens < linear_decode_limit) { + m_psi_buf = std::vector(enc_vector_type::sample_dens+1); + } + } + + public: + const typename alphabet_type::char2comp_type& char2comp = m_alphabet.char2comp; + const typename alphabet_type::comp2char_type& comp2char = m_alphabet.comp2char; + const typename alphabet_type::C_type& C = m_alphabet.C; + const typename alphabet_type::sigma_type& sigma = m_alphabet.sigma; + const alphabet_type& alphabet = m_alphabet; + const psi_type& psi = m_psi; + const lf_type lf = lf_type(*this); + const bwt_type bwt = bwt_type(*this); + const isa_type isa = isa_type(*this); + const bwt_type L = bwt_type(*this); + const first_row_type F = first_row_type(*this); + const text_type text = text_type(*this); + const sa_sample_type& sa_sample = m_sa_sample; + const isa_sample_type& isa_sample = m_isa_sample; + + + //! Default Constructor + csa_sada() + { + create_buffer(); + } + //! Default Destructor + ~csa_sada() { } + + //! Copy constructor + csa_sada(const csa_sada& csa) + { + create_buffer(); + copy(csa); + } + + //! Move constructor + csa_sada(csa_sada&& csa) + { + *this = std::move(csa); + } + + csa_sada(cache_config& config); + + //! Number of elements in the \f$\CSA\f$. + /*! Required for the Container Concept of the STL. +>>>>>>> hyb_sd_vector_slow * \sa max_size, empty * \par Time complexity * \f$ \Order{1} \f$ @@ -241,98 +246,217 @@ class csa_sada { */ void load(std::istream& in); - uint32_t get_sample_dens() const { return t_dens; } + // Calculates how many symbols cc are in the prefix [0..i-1] of the BWT of the original text. + /* + * \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$. + * \param cc The compactified symbol to count in the prefix. + * \returns The number of occurrences of the compactified symbol cc in the prefix [0..i-1]. + * \par Time complexity + * \f$ \Order{\log n t_{\Psi}} \f$ + */ + template + size_type rank_comp_bwt(size_type i, const t_char cc)const + { +// std::cout<<"rank_comp_bwt("< search in previous block (s_begin-1) +// std::cout<<"case (1)"<= i) { // now s_begin < s_end + // Case (2): Some samples inside [cc_begin, cc_end) + // and first sample already larger or equal to i + // => search in previous block (s_begin-1) +// std::cout<<"case (2): "<= " << i << std::endl; + } else { // still s_begin < s_end + // Case (3): Some samples inside [cc_begin, cc_end) + // and first sample smaller than i + // => binary search for first sample >= i + s_begin = upper_bound(s_begin, s_end, i-1); + // => search in previous block (s_begin-1) +// std::cout<<"case (3): s_begin = " << s_begin << " (s_end=" << s_end <<" )"<< std::endl; +// std::cout<<">>>>> m_psi.sample(s_begin-1)="< search in previous block (s_begin-1) + answer_j = true; + } else if (m_psi.sample(s_begin) >= i) { // now s_begin < s_end + // Case (2): Some samples inside [cc_begin, cc_end) + // and first sample already larger or equal to i + // => search in previous block (s_begin-1) + answer_j = (m_psi.sample(s_begin) >= j); + } else { // still s_begin < s_end + // Case (3): Some samples inside [cc_begin, cc_end) + // and first sample smaller than i + // => binary search for first sample >= i + s_begin = upper_bound(s_begin, s_end, i-1); + // => search in previous block (s_begin-1) + answer_j = (s_begin == s_end) or (m_psi.sample(s_begin) >=j); + } + s_begin -= 1; + uint64_t smpl = m_psi.sample(s_begin); + + size_t abs_decode_begin = s_begin*sd; + size_t skip = 0; + if (abs_decode_begin < cc_begin) { + skip = cc_begin - abs_decode_begin; + } + size_t res = abs_decode_begin + skip - cc_begin; + + bool uniform_block = (s_begin+1)*sd < m_psi.size() and skip == 0 and smpl+sd == m_psi.sample(s_begin+1); + if (uniform_block) { + if (answer_j) { + return std::make_tuple(res + (i - smpl), res + (j - smpl)); + } else { + return std::make_tuple(res + (i - smpl), rank_comp_bwt(j, cc)); + } + } + + uint64_t* p = m_psi_buf.data(); + // extract the psi values between two samples + m_psi.get_inter_sampled_values(s_begin, p); + p = m_psi_buf.data(); + + auto it = p + skip; + for (; (res < cc_end - cc_begin) and it < m_psi_buf.data()+sd; ++it) { + if ((*it)+smpl >= i) { + break; + } + ++res; + } + if (answer_j) { + size_t res2 = res; + for (; (res2 < cc_end - cc_begin) and it < m_psi_buf.data()+sd; ++it) { + if ((*it)+smpl >= j) { + break; + } + ++res2; + } + return std::make_tuple(res, res2); + } + return std::make_tuple(res, rank_comp_bwt(j, cc)); + } private: - // Calculates how many symbols c are in the prefix [0..i-1] of the BWT of the original text. - /* - * \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$. - * \param c The symbol to count the occurrences in the prefix. - * \returns The number of occurrences of symbol c in the prefix [0..i-1] of the BWT. - * \par Time complexity - * \f$ \Order{\log n t_{\Psi}} \f$ - */ - size_type rank_bwt(size_type i, const char_type c) const - { - comp_char_type cc = char2comp[c]; - if (cc == 0 and c != 0) // character is not in the text => return 0 - return 0; - if (i == 0) return 0; - assert(i <= size()); - - size_type lower_b, upper_b; // lower_b inclusive, upper_b exclusive - - const size_type sd = m_psi.get_sample_dens(); - size_type lower_sb = (C[cc] + sd - 1) / sd; // lower_sb inclusive - size_type upper_sb = (C[cc + 1] + sd - 1) / sd; // upper_sb exclusive - while (lower_sb + 1 < upper_sb) { - size_type mid = (lower_sb + upper_sb) / 2; - if (m_psi.sample(mid) >= i) - upper_sb = mid; - else - lower_sb = mid; - } - - if (lower_sb == upper_sb) { // the interval was smaller than sd - lower_b = C[cc]; - upper_b = C[cc + 1]; - } else if (lower_sb > (C[cc] + sd - 1) / sd) { // main case - // TODO: don't use get_inter_sampled_values if t_dens is really - // large - lower_b = lower_sb * sd; - if (0 == m_psi_buf.size()) { - upper_b = std::min(upper_sb * sd, C[cc + 1]); - goto finish; - } - uint64_t* p = m_psi_buf.data(); - // extract the psi values between two samples - m_psi.get_inter_sampled_values(lower_sb, p); - p = m_psi_buf.data(); - uint64_t smpl = m_psi.sample(lower_sb); - // handle border cases - if (lower_b + m_psi.get_sample_dens() >= C[cc + 1]) - m_psi_buf[C[cc + 1] - lower_b] = size() - smpl; - else - m_psi_buf[m_psi.get_sample_dens()] = size() - smpl; - // search the result linear - while ((*p++) + smpl < i) - ; - - return p - 1 - m_psi_buf.data() + lower_b - C[cc]; - } else { // lower_b == (m_C[cc]+sd-1)/sd and lower_sb < upper_sb - if (m_psi.sample(lower_sb) >= i) { - lower_b = C[cc]; - upper_b = lower_sb * sd + 1; - } else { - lower_b = lower_sb * sd; - upper_b = std::min(upper_sb * sd, C[cc + 1]); - } - } - finish: - // binary search the interval [C[cc]..C[cc+1]-1] for the result - // size_type lower_b = m_C[cc], upper_b = m_C[cc+1]; // lower_b inclusive, upper_b exclusive - while (lower_b + 1 < upper_b) { - size_type mid = (lower_b + upper_b) / 2; - if (m_psi[mid] >= i) - upper_b = mid; - else - lower_b = mid; - } - if (lower_b > C[cc]) - return lower_b - C[cc] + 1; - else { // lower_b == m_C[cc] - return m_psi[lower_b] < i; // 1 if m_psi[lower_b] + size_t upper_bound(size_t first, size_t last, V value) const + { + size_t mid; + size_t count, step; + count = last-first; + + while (count > 0) { + mid = first; + step = count / 2; + mid += step; + if (!(value < m_psi.sample(mid))) { + first = ++mid; + count -= step + 1; + } else count = step; + } + return first; + } + + // Calculates how many symbols c are in the prefix [0..i-1] of the BWT of the original text. + /* + * \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$. + * \param c The symbol to count in the prefix. + * \returns The number of occurrences of symbol c in the prefix [0..i-1] of the BWT. + * \par Time complexity + * \f$ \Order{\log n t_{\Psi}} \f$ + */ + // replace const char_type c by const std::array& c + template + size_type rank_bwt(size_type i, const t_char c)const + { + auto cc = char2comp[c]; + if (cc==0 and c!=0) // character is not in the text => return 0 + return 0; + if (i == 0) + return 0; + return rank_comp_bwt(i, cc); + } + + template + std::array + rank_bwt(std::array ij, const t_char c)const + { + return {rank_bwt(ij[0], c), rank_bwt(ij[1],c)}; + } + + // Calculates the position of the i-th c in the BWT of the original text. + /* + * \param i The i-th occurrence. \f$i\in [1..rank_bwt(size(),c)]\f$. + * \param c Symbol c. + * \returns The position of the i-th c in the BWT or size() if c does occur less then i times. + * \par Time complexity + * \f$ \Order{t_{\Psi}} \f$ + */ size_type select_bwt(size_type i, const char_type c) const { assert(i > 0); @@ -358,52 +482,81 @@ template ::csa_sada( cache_config& config) { - create_buffer(); - if (!cache_file_exists(key_bwt(), config)) { - return; - } - size_type n = 0; - { - int_vector_buffer bwt_buf( - cache_file_name(key_bwt(), config)); - n = bwt_buf.size(); - auto event = memory_monitor::event("construct csa-alpbabet"); - m_alphabet = alphabet_type(bwt_buf, n); - } - { - auto event = memory_monitor::event("sample SA"); - m_sa_sample = sa_sample_type(config); - } - { - auto event = memory_monitor::event("sample ISA"); - isa_sample_type isa_s(config, &m_sa_sample); - util::swap_support(m_isa_sample, isa_s, &m_sa_sample, (const sa_sample_type*)nullptr); - } - // if ( config.delete_files ) { - // remove_from_cache>(conf::KEY_SA, config); - // } - - int_vector<> cnt_chr(sigma, 0, bits::hi(n) + 1); - for (typename alphabet_type::sigma_type i = 0; i < sigma; ++i) { - cnt_chr[i] = C[i]; - } - // calculate psi - { - auto event = memory_monitor::event("construct PSI"); - int_vector_buffer bwt_buf( - cache_file_name(key_bwt(), config)); - std::string psi_file = cache_file_name(conf::KEY_PSI, config); - auto psi = write_out_mapper<>::create(psi_file, n, bits::hi(n) + 1); - for (size_type i = 0; i < n; ++i) { - psi[cnt_chr[char2comp[bwt_buf[i]]]++] = i; - } - register_cache_file(conf::KEY_PSI, config); - } - { - auto event = memory_monitor::event("encode PSI"); - int_vector_buffer<> psi_buf(cache_file_name(conf::KEY_PSI, config)); - m_psi = t_enc_vec(psi_buf); - } + create_buffer(); + if (!cache_file_exists(key_bwt(), config)) { + return; + } + int_vector_buffer bwt_buf(cache_file_name(key_bwt(),config)); + size_type n = bwt_buf.size(); + { + auto event = memory_monitor::event("construct csa-alpbabet"); +// alphabet_type tmp_alphabet(bwt_buf, n); // TODO: maybe it is possible to use _buf_buf again for multibyte!! + int_vector_buffer text_buf(cache_file_name(key_text(),config)); + m_alphabet = alphabet_type(text_buf, n); + } + + int_vector<> cnt_chr(sigma, 0, bits::hi(n)+1); + for (typename alphabet_type::sigma_type i=0; i < sigma; ++i) { + cnt_chr[i] = C[i]; + } + // calculate psi + { + auto event = memory_monitor::event("construct PSI"); + // TODO: move PSI construct into construct_PSI.hpp + int_vector<> psi(n, 0, bits::hi(n)+1); + for (size_type i=0; i < n; ++i) { + psi[ cnt_chr[ char2comp[bwt_buf[i]] ]++ ] = i; + } + std::string psi_file = cache_file_name(conf::KEY_PSI, config); + if (!store_to_cache(psi, conf::KEY_PSI, config)) { + return; + } + } + { + auto event = memory_monitor::event("encode PSI"); + int_vector_buffer<> psi_buf(cache_file_name(conf::KEY_PSI, config)); + m_psi = t_enc_vec(psi_buf); + /* + enc_vector m_psi_check(psi_buf); + if ( m_psi_check.size() != m_psi.size() ){ + std::cout<<"m_psi.size()="< buf1 = std::vector(enc_vector_type::sample_dens+1); + std::vector buf2 = std::vector(enc_vector_type::sample_dens+1); + + std::cout<<"m_psi.size()="< +#include +#include +#include // for strlen +#include +#include + + + +namespace sdsl +{ + +template +class uef_psi_support +{ + public: + typedef typename bit_vector::size_type size_type; + typedef size_type value_type; + typedef typename t_csa::alphabet_type alphabet_type; + typedef typename alphabet_type::comp_char_type comp_char_type; + typedef typename alphabet_type::C_type C_type; + typedef random_access_const_iterator iterator; + typedef iterator const_iterator; + typedef const value_type reference; + typedef const value_type const_reference; + typedef const value_type* const_pointer; + typedef ptrdiff_t difference_type; + typedef csa_member_tag category; + typedef int_alphabet_tag alphabet_category; + typedef wt_huff_int, + select_support_scan<1>, + select_support_scan<0>> sml_wt_type; + + private: + std::vector m_inc_seq; + std::vector m_inc_seq_rank; + std::vector m_inc_seq_sel; + bit_vector m_sml; // indicates if a context is small or large + rank_support_v5<> m_sml_rank; // rank for m_sml + sml_wt_type m_sml_wt; // wt to get rank to index into + std::vector> m_sml_inc_seq; // small sequences + + const t_csa* m_csa; + + void set_inc_seq_rank_select() + { + for (size_t i=0; i& psi_buf, const t_csa* csa) + { + set_vector(csa); + const auto& C = m_csa->C; + + m_sml = bit_vector(C.size()-1,0); + const auto threshold = t_hyb_vec::block_size; +// (1) Determine the number of small blocks + for (size_t i=0; i sml(sigma_small, 0, bits::hi(threshold)+1); + +// (2) Create a vector containing only the small context sizes + for (size_t i=0, ii=0; i(size, 0, bits::hi(m_csa->size())+1); + } + +// (5) Initialize m_inc_seq (to store the larger contexts) + m_inc_seq.resize(sigma_large); + m_inc_seq_rank.resize(sigma_large); + m_inc_seq_sel.resize(sigma_large); + for (size_t i=0,i0=0,i1=0; i v(C[i+1]-C[i]); + for (size_t j=C[i]; jC[cc+1] - m_csa->C[cc]; // context size + auto rank = m_sml_wt.rank(cc_sml, cs); + size_type begin = rank*cs; + for (size_t j=0; j= i) + return j; + } + return cs; + } else { +// std::cout<<"single_rank: for i="< rank(std::array ij, comp_char_type cc) const + { + if (m_sml[cc]) { + auto cc_sml = m_sml_rank(cc); + size_type cs = m_csa->C[cc+1] - m_csa->C[cc]; // context size + auto rnk = m_sml_wt.rank(cc_sml, cs); + size_type begin = rnk*cs; + std::array res = {{0,0}}; + size_t j=0; + for (size_t k=0; k<2; ++k) { + while (j < cs and m_sml_inc_seq[cs][begin+j] < ij[k]) { + ++j; + } + res[k] = j; + } +// std::array res2 = {rank(ij[0],cc),rank(ij[1],cc)}; +// if ( res != res2 ){ +// std::cout<<"double rank: res=["< res2 = {rank(ij[0],cc),rank(ij[1],cc)}; +// std::cout<<"_double rank: res=["<C[cc+1] - m_csa->C[cc]; // context size + auto rank = m_sml_wt.rank(cc_sml, cs); + return m_sml_inc_seq[cs][rank*cs+(i-1)]; + } else { + size_type cc_large = cc - m_sml_rank(cc); + return m_inc_seq_sel[cc_large](i); + } + } + + value_type operator[](const size_type i) const + { +// std::cout<<"call::psi["<C.begin(), m_csa->C.end(),i) - m_csa->C.begin() - 1; +// std::cout<<"cc="<C[cc]; +// std::cout<<"cum_sum="<, // Vector type used to store the Psi-function + uint32_t t_dens = 32, // Sample density for suffix array (SA) values + uint32_t t_inv_dens = 64, // Sample density for inverse suffix array (ISA) values + class t_sa_sample_strat = sa_order_sa_sampling<>,// Policy class for the SA sampling. + class t_isa_sample_strat= isa_sampling<>, // Policy class for ISA sampling. + class t_alphabet_strat = byte_alphabet // Policy class for the representation of the alphabet. + > +class csa_sada2 +{ + static_assert(t_dens > 0, + "Second template argument has to be greater then 0."); + static_assert(t_inv_dens > 0, + "Third template argument has to be greater then 0."); + static_assert(std::is_same::type, sa_sampling_tag>::value, + "Forth template argument has to be a suffix array sampling strategy."); + static_assert(std::is_same::type, isa_sampling_tag>::value, + "Fifth template argument has to be a inverse suffix array sampling strategy."); + static_assert(is_alphabet::value, + "Sixth template argument has to be a alphabet strategy."); + + friend class bwt_of_csa_psi; + public: + enum { sa_sample_dens = t_dens, + isa_sample_dens = t_inv_dens + }; + + typedef uint64_t value_type; + typedef random_access_const_iterator const_iterator; + typedef const_iterator iterator; + typedef const value_type const_reference; + typedef const_reference reference; + typedef const_reference* pointer; + typedef const pointer const_pointer; + typedef int_vector<>::size_type size_type; + typedef size_type csa_size_type; + typedef ptrdiff_t difference_type; + typedef traverse_csa_psi lf_type; + typedef bwt_of_csa_psi bwt_type; + typedef isa_of_csa_psi isa_type; + typedef text_of_csa text_type; + typedef first_row_of_csa first_row_type; + typedef typename t_sa_sample_strat::template type sa_sample_type; + typedef typename t_isa_sample_strat::template type isa_sample_type; + typedef t_alphabet_strat alphabet_type; + typedef typename alphabet_type::alphabet_category alphabet_category; + typedef typename alphabet_type::comp_char_type comp_char_type; + typedef typename alphabet_type::char_type char_type; // Note: This is the char type of the CSA not the WT! + typedef typename alphabet_type::string_type string_type; + typedef csa_sada2 csa_type; + + typedef csa_tag index_category; + typedef psi_tag extract_category; + typedef uef_psi_support psi_type; + + friend class traverse_csa_psi; + friend class traverse_csa_psi; + + private: + alphabet_type m_alphabet; // alphabet component + psi_type m_psi_support; // psi function + sa_sample_type m_sa_sample; // suffix array samples + isa_sample_type m_isa_sample; // inverse suffix array samples + + public: + const typename alphabet_type::char2comp_type& char2comp = m_alphabet.char2comp; + const typename alphabet_type::comp2char_type& comp2char = m_alphabet.comp2char; + const typename alphabet_type::C_type& C = m_alphabet.C; + const typename alphabet_type::sigma_type& sigma = m_alphabet.sigma; + const alphabet_type& alphabet = m_alphabet; + const psi_type& psi = m_psi_support; + const lf_type lf = lf_type(*this); + const bwt_type bwt = bwt_type(*this); + const isa_type isa = isa_type(*this); + const bwt_type L = bwt_type(*this); + const first_row_type F = first_row_type(*this); + const text_type text = text_type(*this); + const sa_sample_type& sa_sample = m_sa_sample; + const isa_sample_type& isa_sample = m_isa_sample; + + + //! Default Constructor + csa_sada2() { m_psi_support.set_vector(this); } + //! Default Destructor + ~csa_sada2() { } + + //! Copy constructor + csa_sada2(const csa_sada2& csa) + { + *this = csa; + } + + //! Move constructor + csa_sada2(csa_sada2&& csa) + { + *this = std::move(csa); + } + + csa_sada2(cache_config& config); + + //! Number of elements in the \f$\CSA\f$. + /*! Required for the Container Concept of the STL. + * \sa max_size, empty + * \par Time complexity + * \f$ \Order{1} \f$ + */ + size_type size()const + { + return C.size() > 0 ? C[C.size()-1] : 0; + } + + //! Returns the largest size that csa_sada2 can ever have. + /*! Required for the Container Concept of the STL. + * \sa size + */ + static size_type max_size() + { + return int_vector<>::max_size(); + } + + //! Returns if the data strucutre is empty. + /*! Required for the Container Concept of the STL.A + * \sa size + */ + bool empty()const + { + return 0==size(); + } + + //! Returns a const_iterator to the first element. + /*! Required for the STL Container Concept. + * \sa end + */ + const_iterator begin()const + { + return const_iterator(this, 0); + } + + //! Returns a const_iterator to the element after the last element. + /*! Required for the STL Container Concept. + * \sa begin. + */ + const_iterator end()const + { + return const_iterator(this, size()); + } + + //! []-operator + /*! \param i Index of the value. \f$ i \in [0..size()-1]\f$. + * Required for the STL Random Access Container Concept. + * \par Time complexity + * \f$ \Order{s_{SA}\cdot t_{\Psi}} \f$, where every \f$s_{SA}\f$th suffix array entry is sampled and \f$t_{\Psi}\f$ + * is the access time for an element in the \f$\Psi\f$-function. + */ + value_type operator[](size_type i)const + { +// std::cout<<"SA["< of unsigned integers + * \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$. + * \param c The symbol to count in the prefix. + * \returns The number of occurrences of symbol c in the prefix [0..i-1] of the BWT. + * \par Time complexity + * \f$ \Order{\log n t_{\Psi}} \f$ + */ + // replace const char_type c by const std::array& c + template + t_pos rank_bwt(t_pos i, const t_char c)const + { + auto cc = char2comp[c]; + if (cc==0 and c!=0) // character is not in the text => return 0 + return t_pos {0}; + if (i == t_pos {0}) + return t_pos {0}; + return m_psi_support.rank(i, cc); + } + + + // Calculates the position of the i-th c in the BWT of the original text. + /* + * \param i The i-th occurrence. \f$i\in [1..rank_bwt(size(),c)]\f$. + * \param c Symbol c. + * \returns The position of the i-th c in the BWT or size() if c does occur less then i times. + * \par Time complexity + * \f$ \Order{t_{\Psi}} \f$ + */ + size_type select_bwt(size_type i, const char_type c)const + { + assert(i > 0); + comp_char_type cc = char2comp[c]; + if (cc==0 and c!=0) // character is not in the text => return 0 + return size(); + return m_psi_support.select(i, cc); + } +}; + +// == template functions == + +template +csa_sada2::csa_sada2(cache_config& config) +{ + if (!cache_file_exists(key_bwt(), config)) { + return; + } + int_vector_buffer bwt_buf(cache_file_name(key_bwt(),config)); + size_type n = bwt_buf.size(); + { + auto event = memory_monitor::event("construct csa-alpbabet"); +// alphabet_type tmp_alphabet(bwt_buf, n); // TODO: maybe it is possible to use _buf_buf again for multibyte!! + int_vector_buffer text_buf(cache_file_name(key_text(),config)); + m_alphabet = alphabet_type(text_buf, n); + } + + int_vector<> cnt_chr(sigma, 0, bits::hi(n)+1); + for (typename alphabet_type::sigma_type i=0; i < sigma; ++i) { + cnt_chr[i] = C[i]; + } + // calculate psi + { + auto event = memory_monitor::event("construct PSI"); + int_vector<> psi(n, 0, bits::hi(n)+1); + for (size_type i=0; i < n; ++i) { + psi[ cnt_chr[ char2comp[bwt_buf[i]] ]++ ] = i; + } + std::string psi_file = cache_file_name(conf::KEY_PSI, config); + if (!store_to_cache(psi, conf::KEY_PSI, config)) { + return; + } + } + { + auto event = memory_monitor::event("encode PSI"); + int_vector_buffer<> psi_buf(cache_file_name(conf::KEY_PSI, config)); + m_psi_support = psi_type(psi_buf, this); + } + { + auto event = memory_monitor::event("sample SA"); + m_sa_sample = sa_sample_type(config); + } + { + auto event = memory_monitor::event("sample ISA"); + isa_sample_type isa_s(config, &m_sa_sample); + util::swap_support(m_isa_sample, isa_s, &m_sa_sample, (const sa_sample_type*)nullptr); + } +} + +} // end namespace sdsl +#endif diff --git a/include/sdsl/csa_wt.hpp b/include/sdsl/csa_wt.hpp index 4a816bc6d..49a23ccef 100644 --- a/include/sdsl/csa_wt.hpp +++ b/include/sdsl/csa_wt.hpp @@ -238,22 +238,32 @@ class csa_wt { private: // Calculates how many symbols c are in the prefix [0..i-1] of the BWT of the original text. /* - * \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$. - * \param c The symbol to count the occurrences in the prefix. - * \returns The number of occurrences of symbol c in the prefix [0..i-1] of the BWT. - * \par Time complexity - * \f$ \Order{\log |\Sigma|} \f$ - */ - size_type rank_bwt(size_type i, const char_type c) const { return m_wavelet_tree.rank(i, c); } - - // Calculates the position of the i-th c in the BWT of the original text. - /* - * \param i The i-th occurrence. \f$i\in [1..rank(size(),c)]\f$. - * \param c Symbol c. - * \returns The position of the i-th c in the BWT or size() if c does occur less then i times. - * \par Time complexity - * \f$ \Order{t_{\Psi}} \f$ - */ + * \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$. + * \param c The symbol to count the occurrences in the prefix. + * \returns The number of occurrences of symbol c in the prefix [0..i-1] of the BWT. + * \par Time complexity + * \f$ \Order{\log |\Sigma|} \f$ + */ + size_type + rank_bwt(size_type i, const char_type c)const + { + return m_wavelet_tree.rank(i, c); + } + + std::array + rank_bwt(std::array ij, const char_type c)const + { + return {rank_bwt(ij[0], c), rank_bwt(ij[1],c)}; + } + + // Calculates the position of the i-th c in the BWT of the original text. + /* + * \param i The i-th occurrence. \f$i\in [1..rank(size(),c)]\f$. + * \param c Symbol c. + * \returns The position of the i-th c in the BWT or size() if c does occur less then i times. + * \par Time complexity + * \f$ \Order{t_{\Psi}} \f$ + */ size_type select_bwt(size_type i, const char_type c) const { assert(i > 0); diff --git a/include/sdsl/enc_vector.hpp b/include/sdsl/enc_vector.hpp index 6f8db430d..cc6c4eae1 100644 --- a/include/sdsl/enc_vector.hpp +++ b/include/sdsl/enc_vector.hpp @@ -258,56 +258,56 @@ template template enc_vector::enc_vector(int_vector_buffer& v_buf) { - // clear bit_vectors - clear(); - size_type n = v_buf.size(); - if (n == 0) // if c is empty there is nothing to do... - return; - value_type v1 = 0, v2 = 0, max_sample_value = 0; - size_type samples = 0, z_size = 0; - const size_type sd = get_sample_dens(); - // (1) Calculate maximal value of samples and of deltas - for (size_type i = 0, no_sample = 0; i < n; ++i, --no_sample) { - v2 = v_buf[i]; - if (!no_sample) { // is sample - no_sample = sd; - if (max_sample_value < v2) max_sample_value = v2; - ++samples; - } else { - z_size += t_coder::encoding_length(v2 - v1); - } - v1 = v2; - } - - // (2) Write sample values and deltas - // (a) Initialize array for sample values and pointers - if (max_sample_value > z_size + 1) - m_sample_vals_and_pointer.width(bits::hi(max_sample_value) + 1); - else - m_sample_vals_and_pointer.width(bits::hi(z_size + 1) + 1); - m_sample_vals_and_pointer.resize(2 * samples + 2); // add 2 for last entry - util::set_to_value(m_sample_vals_and_pointer, 0); - - // (b) Initilize bit_vector for encoded data - m_z = int_vector<>(z_size, 0, 1); - uint64_t* z_data = t_coder::raw_data(m_z); - uint8_t offset = 0; - - // (c) Write sample values and deltas - z_size = 0; - for (size_type i = 0, j = 0, no_sample = 0; i < n; ++i, --no_sample) { - v2 = v_buf[i]; - if (!no_sample) { // is sample - no_sample = sd; - m_sample_vals_and_pointer[j++] = v2; // write samples - m_sample_vals_and_pointer[j++] = z_size; // write pointers - } else { - z_size += t_coder::encoding_length(v2 - v1); - t_coder::encode(v2 - v1, z_data, offset); // write encoded values - } - v1 = v2; - } - m_size = n; + // clear bit_vectors + clear(); + size_type n = v_buf.size(); + if (n == 0) // if c is empty there is nothing to do... + return; + value_type v1=0, v2=0, max_sample_value=0; + size_type samples=0, z_size=0; + const size_type sd = get_sample_dens(); +// (1) Calculate maximal value of samples and of deltas + for (size_type i=0, no_sample = 0; i < n; ++i, --no_sample) { + v2 = v_buf[i]; + if (!no_sample) { // is sample + no_sample = sd; + if (max_sample_value < v2) max_sample_value = v2; + ++samples; + } else { + z_size += t_coder::encoding_length(v2-v1); + } + v1 = v2; + } + +// (2) Write sample values and deltas +// (a) Initialize array for sample values and pointers + if (max_sample_value > z_size+1) + m_sample_vals_and_pointer.width(bits::hi(max_sample_value) + 1); + else + m_sample_vals_and_pointer.width(bits::hi(z_size+1) + 1); + m_sample_vals_and_pointer.resize(2*samples+2); // add 2 for last entry + util::set_to_value(m_sample_vals_and_pointer, 0); + +// (b) Initilize bit_vector for encoded data + m_z = int_vector<>(z_size, 0, 1); + uint64_t* z_data = t_coder::raw_data(m_z); + uint8_t offset = 0; + +// (c) Write sample values and deltas + z_size = 0; + for (size_type i=0, j=0, no_sample = 0; i < n; ++i, --no_sample) { + v2 = v_buf[i]; + if (!no_sample) { // is sample + no_sample = sd; + m_sample_vals_and_pointer[j++] = v2; // write samples + m_sample_vals_and_pointer[j++] = z_size;// write pointers + } else { + z_size += t_coder::encoding_length(v2-v1); + t_coder::encode(v2-v1, z_data, offset); // write encoded values + } + v1 = v2; + } + m_size = n; } template diff --git a/include/sdsl/enc_vector2.hpp b/include/sdsl/enc_vector2.hpp new file mode 100644 index 000000000..89a08acb4 --- /dev/null +++ b/include/sdsl/enc_vector2.hpp @@ -0,0 +1,316 @@ +// Copyright (c) 2016, the SDSL Project Authors. All rights reserved. +// Please see the AUTHORS file for details. Use of this source code is governed +// by a BSD license that can be found in the LICENSE file. +/*! \file enc_vector2.hpp + \brief enc_vector2.hpp contains the sdsl::enc_vector2 class. + \author Simon Gog +*/ +#ifndef SDSL_ENC_VECTORII +#define SDSL_ENC_VECTORII + +#include "int_vector.hpp" +#include "coder.hpp" +#include "iterators.hpp" + + +//! Namespace for the succinct data structure library. +namespace sdsl +{ + +template +struct enc_vector2_trait { + typedef int_vector<0> int_vector_type; +}; + +template<> +struct enc_vector2_trait<32> { + typedef int_vector<32> int_vector_type; +}; + +template<> +struct enc_vector2_trait<64> { + typedef int_vector<64> int_vector_type; +}; + +//! A generic immutable space-saving vector class for unsigned integers. +/*! A vector v is stored more space-efficiently by self-delimiting coding + * the deltas v[i+1]-v[i] (v[-1]:=0). Space of the structure and random + * access time to it can be controlled by a sampling parameter t_dens. + * + * \tparam t_coder Self-delimiting coder. + * \tparam t_dens Every t_dens-th element of v is sampled. + * \tparam t_width Width of the int_vector used to store the samples and pointers. + * This class is a parameter of csa_sada. + * @ingroup int_vector + */ +template +class enc_vector2 +{ + private: + static_assert(t_dens > 1 , "enc_vector2: sample density must be larger than `1`"); + public: + typedef uint64_t value_type; + typedef random_access_const_iterator iterator; + typedef iterator const_iterator; + typedef const value_type reference; + typedef const value_type const_reference; + typedef const value_type* const_pointer; + typedef ptrdiff_t difference_type; + typedef int_vector<>::size_type size_type; + typedef t_coder coder; + typedef typename enc_vector2_trait::int_vector_type int_vector_type; + typedef iv_tag index_category; + static constexpr uint32_t sample_dens = t_dens; + typedef enc_vector2 enc_vec_type; + + int_vector<0> m_z; // storage for encoded deltas + private: + int_vector_type m_samples; // samples + sd_vector<> m_pointers; + sd_vector<>::select_1_type m_pointers_sel; + size_type m_size = 0; // number of vector elements + + void clear() + { + m_z.resize(0); + m_size = 0; + m_samples.resize(0); + m_pointers = sd_vector<>(); + } + + public: + enc_vector2() = default; + enc_vector2(const enc_vector2&) = default; + enc_vector2(enc_vector2&&) = default; + enc_vector2& operator=(const enc_vector2&) = default; + enc_vector2& operator=(enc_vector2&&) = default; + + //! Constructor for a Container of unsigned integers. + /*! \param c A container of unsigned integers. + */ + template + enc_vector2(const Container& c); + + //! Constructor for an int_vector_buffer of unsigned integers. + /* + \param v_buf A int_vector_buf. + */ + template + enc_vector2(int_vector_buffer& v_buf); + + //! Default Destructor + ~enc_vector2() { } + + //! The number of elements in the enc_vector2. + size_type size()const + { + return m_size; + } + + //! Return the largest size that this container can ever have. + static size_type max_size() + { + return int_vector<>::max_size()/2; + } + + //! Returns if the enc_vector2 is empty. + bool empty() const + { + return 0==m_size; + } + + //! Iterator that points to the first element of the enc_vector2. + const const_iterator begin()const + { + return const_iterator(this, 0); + } + + //! Iterator that points to the position after the last element of the enc_vector2. + const const_iterator end()const + { + return const_iterator(this, this->m_size); + } + + //! operator[] + /*! \param i Index. \f$ i \in [0..size()-1]\f$. + */ + value_type operator[](size_type i)const; + + //! Serialize the enc_vector2 to a stream. + /*! \param out Out stream to write the data structure. + \return The number of written bytes. + */ + size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const; + + //! Load the enc_vector2 from a stream. + void load(std::istream& in); + + //! Returns the i-th sample of enc_vector2 + /*! \param i The index of the sample. 0 <= i < size()/get_sample_dens() + * \return The value of the i-th sample. + */ + value_type sample(const size_type i) const; + + uint32_t get_sample_dens() const + { + return t_dens; + } + + /*! + * \param i The index of the sample for which all values till the next sample should be decoded. 0 <= i < size()/get_sample_dens() + * \param it A pointer to a uint64_t vector, whereto the values should be written + */ + void get_inter_sampled_values(const size_type i, uint64_t* it)const + { + // TODO: this will not work for blocks with m_pointers_sel(i+1)+t_dens==m_pointers_sel(i+2) + *(it++) = 0; + if (i*t_dens + t_dens - 1 < size()) { + if (i+1 < m_samples.size() and m_samples[i] + t_dens == m_samples[i+1]) { + if (m_pointers_sel(i+1) != m_pointers_sel(i+2)) { + throw std::logic_error("Should not be here"); + } + uint64_t x = 1; + while (x < t_dens) { + *(it++) = x; + ++x; + } +// throw std::logic_error("Should not be here"); + } else { + t_coder::template decode(m_z.data(), m_pointers_sel(i+1), t_dens - 1, it); + } + } else { + assert(i*t_dens < size()); + t_coder::template decode(m_z.data(), m_pointers_sel(i+1), size()-i*t_dens - 1, it); + } + }; +}; + +template +inline typename enc_vector2::value_type enc_vector2::operator[](const size_type i)const +{ + assert(i+1 != 0); + assert(i < m_size); + size_type idx = i/get_sample_dens(); + if (idx+1 < m_samples.size() and m_samples[idx]+t_dens == m_samples[idx+1]) { + return m_samples[idx] + i-t_dens*idx; + } + return m_samples[idx] + t_coder::decode_prefix_sum(m_z.data(), m_pointers_sel(idx+1), i-t_dens*idx); +} + +template +inline typename enc_vector2::value_type enc_vector2::sample(const size_type i)const +{ + assert(i*get_sample_dens()+1 != 0); + assert(i*get_sample_dens() < m_size); + return m_samples[i]; +} + +template +template +enc_vector2::enc_vector2(int_vector_buffer& v_buf) +{ + // clear bit_vectors + clear(); + size_type n = v_buf.size(); + if (n == 0) // if c is empty there is nothing to do... + return; + value_type v1=0, v2=0, max_sample_value=0; + size_type samples=0, z_size=0; + const size_type sd = get_sample_dens(); + size_type tmp_z = 0; + bool uniform = true; +// (1) Calculate maximal value of samples and of deltas + for (size_type i=0, no_sample = 0; i < n; ++i, --no_sample) { + v2 = v_buf[i]; + if (!no_sample) { // is sample + uniform &= (v2==v1+1); + if (!uniform) { + z_size += tmp_z; + } + uniform = true; + tmp_z = 0; + no_sample = sd; + if (max_sample_value < v2) max_sample_value = v2; + ++samples; + } else { + uniform &= (v2==v1+1); + tmp_z += t_coder::encoding_length(v2-v1); + } + v1 = v2; + } + z_size += tmp_z; + +// (2) Write sample values and deltas +// (a) Initialize array for sample values and pointers + m_samples = int_vector<>(samples+1, 0, bits::hi(max_sample_value)+1); + + sd_vector_builder builder(z_size, samples); + +// (b) Initilize bit_vector for encoded data + m_z = int_vector<>(z_size, 0, 1); + uint64_t* z_data = t_coder::raw_data(m_z); + uint8_t offset = 0; + +// (c) Write sample values and deltas + z_size = 0; + tmp_z = 0; + uniform = true; + std::vector delta; + for (size_type i=0, j=0, no_sample = 0; i < n; ++i, --no_sample) { + v2 = v_buf[i]; + if (!no_sample) { // is sample + uniform &= (v2==v1+1); + if (!uniform) { + for (size_t k=0; k(builder); + m_pointers_sel.set_vector(&m_pointers); +} + +template +enc_vector2<>::size_type enc_vector2::serialize(std::ostream& out, structure_tree_node* v, std::string name)const +{ + structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_size, out, child, "size"); + written_bytes += m_z.serialize(out, child, "encoded deltas"); + written_bytes += m_samples.serialize(out, child, "samples"); + written_bytes += m_pointers.serialize(out, child, "pointers"); + written_bytes += m_pointers_sel.serialize(out, child, "pointers_sel"); + structure_tree::add_size(child, written_bytes); + return written_bytes; +} + +template +void enc_vector2::load(std::istream& in) +{ + read_member(m_size, in); + m_z.load(in); + m_samples.load(in); + m_pointers.load(in); + m_pointers_sel.load(in); + m_pointers_sel.set_vector(&m_pointers); +} + +} // end namespace sdsl +#endif diff --git a/include/sdsl/hyb_sd_vector.hpp b/include/sdsl/hyb_sd_vector.hpp new file mode 100644 index 000000000..2b78de755 --- /dev/null +++ b/include/sdsl/hyb_sd_vector.hpp @@ -0,0 +1,1426 @@ +// Copyright (c) 2016, the SDSL Project Authors. All rights reserved. +// Please see the AUTHORS file for details. Use of this source code is governed +// by a BSD license that can be found in the LICENSE file. +/*!\file sd_vector.hpp + \brief sd_vector.hpp contains the sdsl::sd_vector class, and + classes which support rank and select for sd_vector. + \author Simon Gog, Matthias Petri +*/ +#ifndef INCLUDED_SDSL_HYB_SD_VECTOR +#define INCLUDED_SDSL_HYB_SD_VECTOR + +#include "int_vector.hpp" +#include "sd_vector.hpp" +#include "coder.hpp" +#include "util.hpp" +#include "iterators.hpp" + +//! Namespace for the succinct data structure library +namespace sdsl +{ + +template +std::string print_vec(t_itr beg, t_itr end) +{ + std::string str = "["; + auto itr = beg; + while (itr != (end - 1)) { + str += std::to_string(*itr) + " "; + ++itr; + } + str += std::to_string(*itr) + "]"; + return str; +} + +inline uint64_t next0(const uint64_t* word, uint64_t idx) +{ + word += (idx >> 6); + auto masked_inverse_word = ~(*word | bits::lo_set[(idx & 0x3F) + 1]); + if (masked_inverse_word) { + return (idx & ~((size_t)0x3F)) + bits::lo(masked_inverse_word); + } + idx = (idx & ~((size_t)0x3F)) + 64; + ++word; + while (*word == 0xFFFFFFFFFFFFFFFFULL) { + idx += 64; + ++word; + } + return idx + bits::lo(~(*word)); +} + +/*! + * \param word Beginning of bit_vector (represented as sequence of uint64_t words) + * \param idx Initial scanning position (in bits) + * \param i i + * \return The number of set bits up to position i (exlusive) + */ +template +inline uint64_t cnt(const uint64_t* word, uint64_t idx, uint64_t i) +{ +// std::cout<<"cnt("<> 6); + auto offset = idx & 0x3F; +// std::cout<<"offset="<> offset; + uint64_t pre_considered = 0; + uint64_t considered = 64 - offset; +// std::cout<<"considered="<= t_block_size) { + return t_block_size; + } + pre_considered = considered; + considered += 64; + w = *(++word); + } + +//std::cout<<"considered="<> 6); + auto offset = idx & 0x3F; + uint64_t w = (~(*word)) >> offset; + uint64_t considered = 64 - offset; + uint64_t res = 0; + uint64_t cnt = 0; + uint64_t word_cnt = bits::cnt(w); + + while (cnt + word_cnt < i) { + cnt += word_cnt; + res = considered; + considered += 64; + w = (~(*(++word))); + word_cnt = bits::cnt(w); + } + // cnt < i and cnt+word_cnt >= i + // add select (i-cnt) to res + res += bits::sel(w, i - cnt); + return res; +} + +template +class hyb_sd_block_bv +{ + public: + typedef bit_vector::size_type size_type; + typedef size_type value_type; + + public: + static size_type estimate_size(size_type u) + { + return u; + } + static size_type + serialize(bit_vector& bv, size_type offset, int_vector<64>& data, size_type) + { + for (size_t i = 0; i < data[t_block_size - 1]+1; ++i) + bv[offset + i] = 0; + for (size_type i = 0; i < t_block_size; ++i) { + bv[offset + data[i]] = 1; + } + return data[t_block_size - 1] + 1; + } + + static size_type select_1(const bit_vector& bv, size_type offset, size_type i, size_type) + { + return sel(bv.data(), offset, i + 1) - offset; + } + + static size_type + rank_1(const bit_vector& bv, const int_vector<>& block_start, size_type block_id, size_type i, size_type) + { + auto offset = block_start[block_id]; + auto next_offset = block_start[block_id+1]; + if (i > next_offset-offset) + return t_block_size; + return cnt(bv.data(), offset, i); + } + + static std::array + rank_1(const bit_vector& bv, const int_vector<>& block_start, size_type block_id, const std::array& ij, size_type) + { + auto offset = block_start[block_id]; + auto next_offset = block_start[block_id+1]; + if (ij[0] > next_offset-offset) { + return {{t_block_size,t_block_size}}; + } + auto resi = cnt(bv.data(), offset, ij[0]); + if (ij[1] > next_offset-offset) { + return {resi, t_block_size}; + } + return {resi, resi+cnt(bv.data(), offset+ij[0], ij[1]-ij[0])}; + } + +}; + + +template +class hyb_sd_block_rl +{ + public: + typedef bit_vector::size_type size_type; + typedef size_type value_type; + + public: + static size_type encode(int_vector<64>& data, bit_vector* bv=nullptr, size_type offset=0) + { + uint64_t* data_ptr = nullptr; + uint8_t in_word_offset = offset % 64; + if (bv != nullptr) { + data_ptr = bv->data() + (offset / 64); + } + auto do_encode = [&](uint64_t x) { + if (data_ptr != nullptr) { + t_coder::encode(x, data_ptr, in_word_offset); + } + return t_coder::encoding_length(x); + }; + + size_type rl_bits = 0; + size_t begin = 0, end = 1; + while (end < data.size()) { + uint64_t delta = data[end]-data[begin]; + if (delta > end-begin) { + if (end-begin == 1) { + rl_bits += do_encode(delta); + } else { // end-begin > 1 + rl_bits += do_encode(1); + rl_bits += do_encode(end-1-begin); + rl_bits += do_encode(data[end]-data[end-1]); + } + begin = end; + ++end; + } else { + ++end; + } + } + if (end-begin > 1) { +// rl_bits += do_encode(1); +// rl_bits += do_encode(end-1-begin); + } + /* + if ( bv!=nullptr ) { + // std::cout<<"Checking block "< decode(const uint64_t* data_ptr, uint8_t offset, const uint64_t* data_ptr_end, uint8_t offset_end) + { + int_vector<64> data(t_block_size, 0); + size_t pos = 1; // data[0]=0, now decode for pos > 0 + while (pos < t_block_size) { + if (data_ptr > data_ptr_end or (data_ptr == data_ptr_end and offset >= offset_end)) { +// std::cout<<"entering corner case"<& block_start, size_type block_id, size_type i, size_type) +// rank_1(const bit_vector& bv, const int_vector<>& block_start, size_type block_id, size_type i, size_type) + { + auto offset_begin = block_start[block_id]; + auto offset_end = block_start[block_id+1]; + auto data = decode(bv.data()+(offset_begin/64), offset_begin%64, bv.data()+(offset_end/64), offset_end%64); + size_type res = 0; + while (res < data.size() and i > data[res]) + ++res; + return res; + } + + /* + static std::array + rank_1(const bit_vector& bv, const int_vector<>& block_start, size_type block_id, const std::array& ij, size_type) + { + auto offset = block_start[block_id]; + auto data = decode(bv.data()+(offset/64), offset%64); + std::array res = {0,0}; + while ( res[0] < data.size() and ij[0] > data[res[0]] ) + ++res[0]; + res[1] = res[0]; + while ( res[1] < data.size() and ij[1] > data[res[1]] ) + ++res[1]; + return res; + } + */ +}; + + + +template +class hyb_sd_block_full +{ + public: + typedef bit_vector::size_type size_type; + typedef size_type value_type; + + public: + static size_type estimate_size(size_type u) + { + return (t_block_size == u) ? 0 : std::numeric_limits::max(); + } + + static size_type + serialize(bit_vector&, size_type, int_vector<64>&, size_type u) + { + if (t_block_size != u) { + std::cerr << "this should not happen!" << std::endl; + } + return 0; + } + + static size_type select_1(const bit_vector&, size_type, size_type i, size_type) + { + return i; + } + + static size_type rank_1(const bit_vector&, const int_vector<>&, size_type, size_type i, size_type) + { + return std::min(t_block_size, i); + } +}; + +template +class hyb_sd_block_ef +{ + public: + typedef bit_vector::size_type size_type; + typedef size_type value_type; + + private: + // TODO factor out calculation of logu and logm + + public: + static size_type estimate_size(size_type u) + { + uint8_t logu = bits::hi(u) + 1; + uint8_t logm = bits::hi(t_block_size) + 1; // TODO constexpr for hi? + if (logm == logu) + logm--; + size_type width_low = logu - logm; + size_type size_in_bits = width_low * t_block_size + (1ULL << logm) + t_block_size + 1; + return size_in_bits; + } + static size_type + serialize(bit_vector& bv, size_type offset, int_vector<64>& data, size_type u) + { + size_type written_bits = 0; + uint8_t logu = bits::hi(u) + 1; + uint8_t logm = bits::hi(t_block_size) + 1; + if (logm == logu) + logm--; + size_type width_low = logu - logm; + /* write low */ + auto data_ptr = bv.data() + (offset / 64); + uint8_t in_word_offset = offset % 64; + for (size_type i = 0; i < t_block_size; i++) { + uint64_t x = data[i]; + bits::write_int_and_move(data_ptr, x, in_word_offset, width_low); + } + written_bits += width_low * t_block_size; + + /* write high */ + size_type last_high = 0; + for (size_type i = 0; i < t_block_size; i++) { + uint64_t x = data[i]; + size_type cur_high = x >> width_low; + size_type write_val = cur_high - last_high; + while (write_val >= 64) { + bits::write_int_and_move(data_ptr, 0ULL, in_word_offset, 64); + write_val -= 64; + written_bits += 64; + } + bits::write_int_and_move(data_ptr, 1ULL << write_val, in_word_offset, write_val + 1); + last_high = cur_high; + written_bits += write_val + 1; + } + bv[offset+written_bits] = 0; + ++written_bits; + return written_bits; + } + + static size_type select_1(const bit_vector& bv, size_type offset, size_type i, size_type u) + { + uint8_t logu = bits::hi(u) + 1; + uint8_t logm = bits::hi(t_block_size) + 1; + if (logm == logu) + logm--; + size_type width_low = logu - logm; + size_type hi_part_offset = offset + t_block_size * width_low; + size_type low_part_offset = offset + i * width_low; + + auto low_part_data_ptr = bv.data() + (low_part_offset / 64); + uint8_t low_part_in_word_offset = low_part_offset % 64; + auto low_part = bits::read_int(low_part_data_ptr, low_part_in_word_offset, width_low); + + auto bucket = sel(bv.data(), hi_part_offset, i + 1) - hi_part_offset - i; + return (bucket << width_low) | low_part; + } + + static size_type rank_1(const bit_vector& bv, const int_vector<>& block_start, size_type block_id, size_type i, size_type u) + { +//std::cout<<">>>>>>>>rank_1("<> width_low); + size_type zeros_in_high = hi_size - t_block_size; + if (zeros_in_high < high_val+1) { + return t_block_size; + } + size_type local_sel = sel0(bv.data(), hi_part_offset, high_val + 1); + + size_type sel_high = local_sel; + size_type rank_low = sel_high - high_val; + if (0 == rank_low) { + return 0; + } + + size_type low_part_offset = offset + rank_low * width_low; + size_type val_low = i & bits::lo_set[width_low]; + auto low_part_data_ptr = bv.data() + (low_part_offset / 64); + uint8_t low_part_in_word_offset = low_part_offset % 64; + +//std::cout<<"_sel_high="<>sel_high "<(m_block_type[block_id]); + size_type block_offset = m_block_start[block_id]; + + switch (block_type) { + case hyb_sd_blocktype::BV: + res += hyb_sd_block_bv::select_1(m_bottom, block_offset, in_block_offset, u); + break; + case hyb_sd_blocktype::EF: + res += hyb_sd_block_ef::select_1(m_bottom, block_offset, in_block_offset, u); + break; + case hyb_sd_blocktype::FULL: + res += hyb_sd_block_full::select_1(m_bottom, block_offset, in_block_offset, u); + break; + case hyb_sd_blocktype::RL: + res += hyb_sd_block_rl::select_1(m_bottom, m_block_start, block_id, in_block_offset, u); + break; + } + return res; + } + + size_type rank_1(size_type i) const + { +// bool debug = false; +// if ( i==2075 or i==2076) { +// debug = true; +// } +//std::cout<<"!!! rank_1("< m_size or m_num_ones == 0) { +//std::cout<<"!!! i > m_size "< "<(m_block_type[block_id]); +//if (debug) { +// std::cout<<"!!! i="<::rank_1(m_bottom, m_block_start, block_id, in_block_i, 0); + } + + auto u = m_top_sel(block_id + 2) - top_value; +// auto bt = determine_block_type(u); +// auto block_type = bt.first; +// size_type block_offset = m_block_start[block_id]; + + switch (block_type) { + case hyb_sd_blocktype::BV: +// std::cout << "!!!BV" << std::endl; + res += hyb_sd_block_bv::rank_1(m_bottom, m_block_start, block_id, in_block_i, u); + break; + case hyb_sd_blocktype::EF: +//if (debug){ +// std::cout << "!!!single EF in_block_i="<::rank_1(m_bottom, m_block_start, block_id, in_block_i, u); + break; + case hyb_sd_blocktype::FULL: +// std::cout << "!!!FULL" << std::endl; + res += hyb_sd_block_full::rank_1(m_bottom, m_block_start, block_id, in_block_i, u); + break; + case hyb_sd_blocktype::RL: +// std::cout << "!!!RL" << std::endl; + res += hyb_sd_block_rl::rank_1(m_bottom, m_block_start, block_id, in_block_i, u); + break; + } +//if (debug) { +// std::cout<<"!!!! res="< + rank_1(std::array ij) const + { + if (ij[0] > ij[1]) { + return {rank_1(ij[0]),rank_1(ij[1])}; + } + // no we know ij[0] <= ij[1] + if (ij[0] > m_size or m_num_ones == 0) { + return {m_num_ones, m_num_ones}; + } + if (ij[1] > m_size or m_num_ones == 0) { + return {rank_1(ij[0]), m_num_ones}; + } + auto block_id = m_top_rank(ij[0]); + if (block_id == 0) { + size_type first_element = m_top_sel(1); + if (ij[1] <= first_element) { + return {{0,0}}; + } + return {0, rank_1(ij[1])}; // TODO: can still be optimized + } + block_id -= 1; + size_type r = block_id * t_block_size; + auto top_value = m_top_sel(block_id + 1); + size_type in_block_i = ij[0]; + in_block_i -= top_value; + size_type in_block_j = ij[1]; + in_block_j -= top_value; + + if (in_block_i == 0) { + if (ij[0]==ij[1]) { + return {r,r}; + } + return {r, rank_1(ij[1])}; // TODO: can still be optimized + } + + auto block_type = static_cast(m_block_type[block_id]); + if (block_type == hyb_sd_blocktype::FULL and in_block_j < t_block_size) { + return {r+hyb_sd_block_full::rank_1(m_bottom, m_block_start, block_id, in_block_i, 0), + r+hyb_sd_block_full::rank_1(m_bottom, m_block_start, block_id, in_block_j, 0) + }; + } + + auto u = m_top_sel(block_id + 2) - top_value; +// auto bt = determine_block_type(u); +// auto block_type = bt.first; +// size_type block_offset = m_block_start[block_id]; + std::array res {{r,r}}; + + switch (block_type) { + case hyb_sd_blocktype::BV: +// std::cout<<"double rank_1 for BV"<= u) { + res[0] += hyb_sd_block_bv::rank_1(m_bottom, m_block_start, block_id, in_block_i, u); + } else { + auto in_block_rank = hyb_sd_block_bv::rank_1(m_bottom, m_block_start, block_id, {in_block_i, in_block_j}, u); + res[0] += in_block_rank[0]; + res[1] += in_block_rank[1]; + } + break; + case hyb_sd_blocktype::EF: +// std::cout<<"double rank_1 for EF"<= u) { + res[0] += hyb_sd_block_ef::rank_1(m_bottom, m_block_start, block_id, in_block_i, u); + } else { +// std::cout<<"call double"<::rank_1(m_bottom, m_block_start, block_id, {in_block_i, in_block_j}, u); + res[0] += in_block_rank[0]; + res[1] += in_block_rank[1]; + } + break; + case hyb_sd_blocktype::FULL: +// std::cout<<"double rank_1 for FULL"<::rank_1(m_bottom, m_block_start, block_id, in_block_i, u); + if (in_block_j < u) { + res[1] += hyb_sd_block_full::rank_1(m_bottom, m_block_start, block_id, in_block_j, u); + } + break; + case hyb_sd_blocktype::RL: +// std::cout<<"double rank_1 for RL"<= u) { + res[0] += hyb_sd_block_rl::rank_1(m_bottom, m_block_start, block_id, in_block_i, u); + } else { + auto in_block_rank = hyb_sd_block_rl::rank_1(m_bottom, m_block_start, block_id, {in_block_i, in_block_j}, u); + res[0] += in_block_rank[0]; + res[1] += in_block_rank[1]; + } + break; + } + if (in_block_j >= u) { + res[1] = rank_1(ij[1]); + } + return res; + } + + //! Get the integer value of the binary string of length len starting at position idx. + uint64_t get_int(size_type idx, const uint8_t len = 64) const + { + uint64_t x = 0ULL; + for (size_t i=0; i((*this)[idx+i])) << i; + } + return x; + } + + //! Returns the size of the original bit vector. + size_type size() const + { + return m_size; + } + + //! Serializes the data structure into the given ostream + size_type serialize(std::ostream& out, structure_tree_node* v = nullptr, std::string name = "") const + { + structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_size, out, child, "size"); + written_bytes += write_member(m_num_ones, out, child, "num_ones"); + written_bytes += m_top.serialize(out, child, "top"); + written_bytes += m_top_sel.serialize(out, child, "top_sel"); + written_bytes += m_top_rank.serialize(out, child, "top_rank"); + //written_bytes += m_bottom.serialize(out, nullptr, "bottom"); + auto bottom_bytes = m_bottom.serialize(out, nullptr, "bottom"); + { + structure_tree_node* bottom_child = structure_tree::add_child(child, "bottom", util::class_name(m_bottom)); + std::array written_bits = {{0,0,0,0}}; + for (size_t i=1; i names = {"EF","BV","FULL","RL"}; + for (size_t i=0; i> +class select_support_hyb_sd +{ + public: + typedef typename hyb_bv_type::size_type size_type; + typedef hyb_bv_type bit_vector_type; + enum { bit_pat = t_b }; + enum { bit_pat_len = (uint8_t)1 }; + static constexpr uint16_t block_size = hyb_bv_type::block_size; + + private: + const hyb_bv_type* m_v; + + public: + explicit select_support_hyb_sd(const hyb_bv_type* v = nullptr) + { + set_vector(v); + } + + size_type select(size_type i) const + { + return m_v->select_1(i); + } + + size_type operator()(size_type i) const + { + return select(i); + } + + size_type size() const + { + return m_v->size(); + } + + void set_vector(const hyb_bv_type* v = nullptr) + { + m_v = v; + } + + select_support_hyb_sd& operator=(const select_support_hyb_sd& ss) + { + if (this != &ss) { + set_vector(ss.m_v); + } + return *this; + } + + void load(std::istream&, const hyb_bv_type* v = nullptr) + { + set_vector(v); + } + + size_type serialize(std::ostream& out, structure_tree_node* v = nullptr, std::string name = "") const + { + return serialize_empty_object(out, v, name, this); + } +}; + +//! Rank data structure for hyb_sd_vector +template > +class rank_support_hyb_sd +{ + public: + typedef typename hyb_bv_type::size_type size_type; + typedef hyb_bv_type bit_vector_type; + enum { bit_pat = t_b }; + enum { bit_pat_len = (uint8_t)1 }; + static constexpr uint16_t block_size = hyb_bv_type::block_size; + + private: + const hyb_bv_type* m_v; + + public: + explicit rank_support_hyb_sd(const hyb_bv_type* v = nullptr) + { + set_vector(v); + } + + template + t_pos rank(t_pos i) const + { + return m_v->rank_1(i); + } + + template + t_pos operator()(t_pos i) const + { + return rank(i); + } + + size_type size() const + { + return m_v->size(); + } + + void set_vector(const hyb_bv_type* v = nullptr) + { + m_v = v; + } + + rank_support_hyb_sd& operator=(const rank_support_hyb_sd& ss) + { + if (this != &ss) { + set_vector(ss.m_v); + } + return *this; + } + + void load(std::istream&, const hyb_bv_type* v = nullptr) + { + set_vector(v); + } + + size_type serialize(std::ostream& out, structure_tree_node* v = nullptr, std::string name = "") const + { + return serialize_empty_object(out, v, name, this); + } +}; + +} // end namespace +#endif diff --git a/include/sdsl/structure_tree.hpp b/include/sdsl/structure_tree.hpp index ecf3dc303..74c78bf9d 100644 --- a/include/sdsl/structure_tree.hpp +++ b/include/sdsl/structure_tree.hpp @@ -121,6 +121,7 @@ inline std::string create_html_header(const char* file_name) << " \n" << " \n" << " " << file_name << "\n" + << " " << " \n" << "