diff --git a/include/sdsl/bit_vectors.hpp b/include/sdsl/bit_vectors.hpp
index b3b2b7174..fe35a2390 100644
--- a/include/sdsl/bit_vectors.hpp
+++ b/include/sdsl/bit_vectors.hpp
@@ -13,5 +13,6 @@
 #include "rrr_vector.hpp"
 #include "sd_vector.hpp"
 #include "hyb_vector.hpp"
+#include "hyb_sd_vector.hpp"
 
 #endif
diff --git a/include/sdsl/bits.hpp b/include/sdsl/bits.hpp
index 4e152faa3..94caa6cb2 100644
--- a/include/sdsl/bits.hpp
+++ b/include/sdsl/bits.hpp
@@ -11,12 +11,15 @@
 #include <stdint.h> // for uint64_t uint32_t declaration
 #include <iostream> // for cerr
 #include <cassert>
-#ifdef __SSE4_2__
+
+// clang-format off
+#if 1
 #include <xmmintrin.h>
 #endif
-#ifdef __BMI2__
+#if 0
 #include <x86intrin.h>
 #endif
+// clang-format on
 
 #ifdef WIN32
 #include "iso646.h"
@@ -490,9 +493,11 @@ struct bits_impl {
 template <typename T>
 inline uint64_t bits_impl<T>::cnt(uint64_t x)
 {
-#ifdef __SSE4_2__
+// clang-format off
+#if 1
 	return __builtin_popcountll(x);
 #else
+// clang-format on
 #ifdef POPCOUNT_TL
 	return lt_cnt[x & 0xFFULL] + lt_cnt[(x >> 8) & 0xFFULL] + lt_cnt[(x >> 16) & 0xFFULL] +
 		   lt_cnt[(x >> 24) & 0xFFULL] + lt_cnt[(x >> 32) & 0xFFULL] + lt_cnt[(x >> 40) & 0xFFULL] +
@@ -579,11 +584,13 @@ inline uint64_t bits_impl<T>::map01(uint64_t x, uint64_t c)
 template <typename T>
 inline uint32_t bits_impl<T>::sel(uint64_t x, uint32_t i)
 {
-#ifdef __BMI2__
+// clang-format off
+#if 0
 	// taken from folly
 	return _tzcnt_u64(_pdep_u64(1ULL << (i - 1), x));
 #endif
-#ifdef __SSE4_2__
+#if 1
+	// clang-format on
 	uint64_t s = x, b;
 	s		   = s - ((s >> 1) & 0x5555555555555555ULL);
 	s		   = (s & 0x3333333333333333ULL) + ((s >> 2) & 0x3333333333333333ULL);
@@ -653,10 +660,12 @@ inline uint32_t bits_impl<T>::_sel(uint64_t x, uint32_t i)
 template <typename T>
 inline uint32_t bits_impl<T>::hi(uint64_t x)
 {
-#ifdef __SSE4_2__
+// clang-format off
+#if 1
 	if (x == 0) return 0;
 	return 63 - __builtin_clzll(x);
 #else
+	// clang-format on
 	uint64_t t, tt;			  // temporaries
 	if ((tt = x >> 32)) {	 // hi >= 32
 		if ((t = tt >> 16)) { // hi >= 48
@@ -679,10 +688,12 @@ inline uint32_t bits_impl<T>::hi(uint64_t x)
 template <typename T>
 inline uint32_t bits_impl<T>::lo(uint64_t x)
 {
-#ifdef __SSE4_2__
+// clang-format off
+#if 1
 	if (x == 0) return 0;
 	return __builtin_ctzll(x);
 #else
+	// clang-format on
 	if (x & 1) return 0;
 	if (x & 3) return 1;
 	if (x & 7) return 2;
diff --git a/include/sdsl/coder_elias_delta.hpp b/include/sdsl/coder_elias_delta.hpp
index c6cbedf34..7f28d2144 100644
--- a/include/sdsl/coder_elias_delta.hpp
+++ b/include/sdsl/coder_elias_delta.hpp
@@ -140,6 +140,18 @@ class elias_delta {
 	{
 		return v.m_data;
 	}
+
+    static uint64_t decode(const uint64_t*& z, uint8_t& offset)
+    {
+        size_type len_1_len;
+        len_1_len = bits::read_unary_and_move(z, offset); // read length of length of x
+        if (!len_1_len) {
+            return 1ULL;
+        }
+        size_type len   =  bits::read_int_and_move(z, offset, len_1_len) + (1ULL << len_1_len);
+        return bits::read_int_and_move(z, offset, len-1) + (len-1<64) * (1ULL << (len-1));
+    }
+
 };
 
 // \sa coder::elias_delta::encoding_length
diff --git a/include/sdsl/config.hpp b/include/sdsl/config.hpp
index bd45b9c87..c2a59fda8 100644
--- a/include/sdsl/config.hpp
+++ b/include/sdsl/config.hpp
@@ -124,6 +124,31 @@ using key_text_trait = key_text_trait_impl<width,void>;
 template<uint8_t width>
 using key_bwt_trait = key_bwt_trait_impl<width,void>;
 
+template<uint8_t int_width>
+constexpr const char* key_text()
+{
+    return conf::KEY_TEXT_INT;
+}
+
+template<uint8_t int_width>
+constexpr const char* key_bwt()
+{
+    return conf::KEY_BWT_INT;
+}
+
+
+template<>
+inline constexpr const char* key_text<8>()
+{
+    return conf::KEY_TEXT;
+}
+
+template<>
+inline constexpr const char* key_bwt<8>()
+{
+    return conf::KEY_BWT;
+}
+
 }
 
 #endif
diff --git a/include/sdsl/construct_sa.hpp b/include/sdsl/construct_sa.hpp
index fde397d5f..1b57538b9 100644
--- a/include/sdsl/construct_sa.hpp
+++ b/include/sdsl/construct_sa.hpp
@@ -11,6 +11,7 @@
 
 #include "config.hpp"
 #include "int_vector.hpp"
+#include "bits.hpp"
 
 #include "divsufsort.h"
 #include "divsufsort64.h"
diff --git a/include/sdsl/csa_alphabet_strategy.hpp b/include/sdsl/csa_alphabet_strategy.hpp
index e3eb42369..b571a8ae1 100644
--- a/include/sdsl/csa_alphabet_strategy.hpp
+++ b/include/sdsl/csa_alphabet_strategy.hpp
@@ -55,37 +55,20 @@ template <class bit_vector_type   = bit_vector,
 		  class C_array_type = int_vector<>>
 class succinct_byte_alphabet;
 
-template <class bit_vector_type		= sd_vector<>,
-		  class rank_support_type   = typename bit_vector_type::rank_1_type,
-		  class select_support_type = typename bit_vector_type::select_1_type,
-		  class C_array_type		= int_vector<>>
+template<uint8_t t_q                 = 3,
+         typename bit_vector_type     = bit_vector,
+         typename rank_support_type   = rank_support_scan<>,
+         typename select_support_type = select_support_scan<>
+         >
+class succinct_multibyte_alphabet;
+
+template<class bit_vector_type     = sd_vector<>,
+         class rank_support_type   = typename bit_vector_type::rank_1_type,
+         class select_support_type = typename bit_vector_type::select_1_type,
+         class C_array_type        = int_vector<>
+         >
 class int_alphabet;
 
-template<uint8_t int_width>
-constexpr const char* key_text()
-{
-    return conf::KEY_TEXT_INT;
-}
-
-template<uint8_t int_width>
-constexpr const char* key_bwt()
-{
-    return conf::KEY_BWT_INT;
-}
-
-
-template<>
-inline constexpr const char* key_text<8>()
-{
-    return conf::KEY_TEXT;
-}
-
-template<>
-inline constexpr const char* key_bwt<8>()
-{
-    return conf::KEY_BWT;
-}
-
 template <class t_alphabet_strategy>
 struct alphabet_trait {
 	typedef byte_alphabet type;
@@ -253,6 +236,73 @@ class byte_alphabet {
 };
 
 
+//! Helper class for the char2comp mapping
+template<typename t_alphabet_strat>
+class char2comp_wrapper
+{
+    private:
+        const t_alphabet_strat* m_strat;
+    public:
+        using comp_char_type = typename t_alphabet_strat::comp_char_type;
+        using char_type = typename t_alphabet_strat::char_type;
+        using size_type = typename t_alphabet_strat::size_type;
+
+        char2comp_wrapper(const t_alphabet_strat* strat) : m_strat(strat) {}
+
+        comp_char_type operator[](char_type c) const // TODO: using a const reference???
+        {
+            if (c >= m_strat->m_char.size() or !m_strat->m_char[c])
+                return (comp_char_type)0;
+            return (comp_char_type) m_strat->m_char_rank((size_type)c);
+        }
+
+        template<typename t_strat = t_alphabet_strat>
+        typename std::enable_if<(t_strat::q>1), typename t_strat::multi_comp_char_type>::type
+        operator[](const typename std::enable_if<(t_strat::q>1), typename t_strat::multi_char_type>::type& c) const
+        {
+            typename t_strat::multi_comp_char_type x {0};
+            auto sigma_size =  m_strat->sigma;
+            for (size_t i=0; i < t_alphabet_strat::q; ++i) {
+                if (c >= m_strat->m_char.size() or !m_strat->m_char[c])
+                    return 0ULL;
+                x *= m_strat->sigma_q;
+                x += m_strat->m_char_rank((size_type)c);
+            }
+            return x;
+        }
+
+};
+
+//! Helper class for the comp2char mapping
+template<typename t_alphabet_strat>
+class comp2char_wrapper
+{
+    private:
+        const t_alphabet_strat* m_strat;
+    public:
+        using char_type = typename t_alphabet_strat::char_type;
+        using comp_char_type = typename t_alphabet_strat::comp_char_type;
+        using size_type = typename t_alphabet_strat::size_type;
+
+        comp2char_wrapper(const t_alphabet_strat* strat) : m_strat(strat) {}
+
+        char_type operator[](comp_char_type c) const // TODO: using a const reference???
+        {
+            return (char_type) m_strat->m_char_select(((size_type)c)+1);
+        }
+
+        template<typename t_strat = t_alphabet_strat>
+        typename std::enable_if<(t_strat::q>1), typename t_strat::multi_char_type>::type
+        operator[](typename std::enable_if<(t_strat::q>1), typename t_strat::multi_comp_char_type>::type c) const
+        {
+            std::cout<<"TODO comp2char multi_byte x="<<static_cast<uint64_t>(c)<<" t_alphabet_strat::q="<<(int)t_alphabet_strat::q<<std::endl;
+            return 0; // TODO
+        }
+};
+
+
+
+
 //! A space-efficient representation for byte alphabets.
 /*!
  *  The mapping `char2comp` and its inverse `comp2char` is realized internally
@@ -263,218 +313,475 @@ class byte_alphabet {
  *  The types to represent `char2comp`, `comp2char`, and `C` can be specified
  *  by template parameters.
  */
-template <class bit_vector_type,
-		  class rank_support_type,
-		  class select_support_type,
-		  class C_array_type>
-class succinct_byte_alphabet {
-public:
-	class char2comp_wrapper;
-	class comp2char_wrapper;
-	friend class char2comp_wrapper;
-	friend class comp2char_wrapper;
-
-	typedef int_vector<>::size_type size_type;
-	typedef char2comp_wrapper		char2comp_type;
-	typedef comp2char_wrapper		comp2char_type;
-	typedef C_array_type			C_type;
-	typedef uint16_t				sigma_type;
-	typedef uint8_t					char_type;
-	typedef uint8_t					comp_char_type;
-	typedef std::string				string_type;
-	typedef byte_alphabet_tag		alphabet_category;
-	enum { int_width = 8 };
-
-	//! Helper class for the char2comp mapping
-	class char2comp_wrapper {
-	private:
-		const succinct_byte_alphabet* m_strat;
-
-	public:
-		char2comp_wrapper(const succinct_byte_alphabet* strat) : m_strat(strat) {}
-		comp_char_type operator[](char_type c) const
-		{
-			if (c >= m_strat->m_char.size() or !m_strat->m_char[c]) return (comp_char_type)0;
-			return (comp_char_type)m_strat->m_char_rank((size_type)c);
-		}
-	};
-
-	//! Helper class for the comp2char mapping
-	class comp2char_wrapper {
-	private:
-		const succinct_byte_alphabet* m_strat;
-
-	public:
-		comp2char_wrapper(const succinct_byte_alphabet* strat) : m_strat(strat) {}
-		char_type operator[](comp_char_type c) const
-		{
-			return (char_type)m_strat->m_char_select(((size_type)c) + 1);
-		}
-	};
-
-	const char2comp_type char2comp;
-	const comp2char_type comp2char;
-	const C_type&		 C;
-	const sigma_type&	sigma;
-
-private:
-	bit_vector_type		m_char; // `m_char[i]` indicates if character with code i is present or not
-	rank_support_type   m_char_rank;   // rank data structure for `m_char` to answer char2comp
-	select_support_type m_char_select; // select data structure for `m_char` to answer comp2char
-	C_type				m_C;		   // cumulative counts for the compact alphabet [0..sigma]
-	sigma_type			m_sigma;	   // effective size of the alphabet
-
-public:
-	//! Default constructor
-	succinct_byte_alphabet() : char2comp(this), comp2char(this), C(m_C), sigma(m_sigma), m_sigma(0)
-	{
-	}
+template<class bit_vector_type, class rank_support_type, class select_support_type, class C_array_type>
+class succinct_byte_alphabet
+{
+    public:
+        static constexpr uint8_t q = 1;
+
+        friend class char2comp_wrapper<succinct_byte_alphabet>;
+        friend class comp2char_wrapper<succinct_byte_alphabet>;
+        typedef char2comp_wrapper<succinct_byte_alphabet> char2comp_type;
+        typedef comp2char_wrapper<succinct_byte_alphabet> comp2char_type;
+
+        typedef int_vector<>::size_type size_type;
+        typedef C_array_type            C_type;
+        typedef uint16_t                sigma_type;
+        typedef sigma_type              multi_sigma_type;
+        typedef uint8_t                 char_type;
+        typedef uint8_t                 comp_char_type;
+        typedef std::string             string_type;
+        typedef byte_alphabet_tag       alphabet_category;
+        enum { int_width = 8 };
+
+    private:
+        bit_vector_type     m_char;        // `m_char[i]` indicates if character with code i is present or not
+        rank_support_type   m_char_rank;   // rank data structure for `m_char` to answer char2comp
+        select_support_type m_char_select; // select data structure for `m_char` to answer comp2char
+        C_type              m_C;            // cumulative counts for the compact alphabet [0..sigma]
+        sigma_type          m_sigma;       // effective size of the alphabet
+
+        void copy(const succinct_byte_alphabet& strat)
+        {
+            m_char        = strat.m_char;
+            m_char_rank   = strat.m_char_rank;
+            m_char_rank.set_vector(&m_char);
+            m_char_select = strat.m_char_select;
+            m_char_select.set_vector(&m_char);
+            m_C           = strat.m_C;
+            m_sigma       = strat.m_sigma;
+        }
+    public:
+
+        const char2comp_type    char2comp;
+        const comp2char_type    comp2char;
+        const C_type&           C;
+        const sigma_type&       sigma;
+        const multi_sigma_type& sigma_q;
+
+        //! Default constructor
+        succinct_byte_alphabet() : char2comp(this), comp2char(this), C(m_C), sigma(m_sigma), sigma_q(m_sigma)
+        {
+            m_sigma = 0;
+        }
 
-	//! Construct from a byte-stream
-	/*!
+        //! Construct from a byte-stream
+        /*!
          *  \param text_buf Byte stream.
          *  \param len      Length of the byte stream.
          */
-	succinct_byte_alphabet(int_vector_buffer<8>& text_buf, int_vector_size_type len)
-		: char2comp(this), comp2char(this), C(m_C), sigma(m_sigma)
-	{
-		m_sigma = 0;
-		if (0 == len or 0 == text_buf.size()) return;
-		assert(len <= text_buf.size());
-		// initialize vectors
-		int_vector<64> D(257, 0);
-		bit_vector	 tmp_char(256, 0);
-		// count occurrences of each symbol
-		for (size_type i = 0; i < len; ++i) {
-			++D[text_buf[i]];
-		}
-		assert(1 == D[0]); // null-byte should occur exactly once
-		m_sigma = 0;
-		for (int i = 0; i < 256; ++i)
-			if (D[i]) {
-				tmp_char[i] = 1;	// mark occurring character
-				D[m_sigma]  = D[i]; // compactify m_C
-				++m_sigma;
-			}
-		// resize to sigma+1, since CSAs also need the sum of all elements
-		m_C = C_type(m_sigma + 1, 0, bits::hi(len) + 1);
-
-		for (int i = (int)m_sigma; i > 0; --i)
-			m_C[i] = D[i - 1];
-		m_C[0]	 = 0;
-		for (int i = 1; i <= (int)m_sigma; ++i)
-			m_C[i] = m_C[i] + m_C[i - 1];
-		assert(m_C[sigma] == len);
-		m_char = tmp_char;
-		util::init_support(m_char_rank, &m_char);
-		util::init_support(m_char_select, &m_char);
-	}
+        succinct_byte_alphabet(int_vector_buffer<8>& text_buf, int_vector_size_type len):
+            char2comp(this), comp2char(this), C(m_C), sigma(m_sigma), sigma_q(m_sigma)
+        {
+            m_sigma = 0;
+            if (0 == len or 0 == text_buf.size())
+                return;
+            assert(len <= text_buf.size());
+            // initialize vectors
+            int_vector<64> D(257, 0);
+            bit_vector tmp_char(256, 0);
+            // count occurrences of each symbol
+            for (size_type i=0; i < len; ++i) {
+                ++D[text_buf[i]];
+            }
+            assert(1 == D[0]); // null-byte should occur exactly once
+            m_sigma = 0;
+            for (int i=0; i<256; ++i)
+                if (D[i]) {
+                    tmp_char[i] = 1;    // mark occurring character
+                    D[m_sigma] = D[i];  // compactify m_C
+                    ++m_sigma;
+                }
+            // resize to sigma+1, since CSAs also need the sum of all elements
+            m_C = C_type(m_sigma+1, 0, bits::hi(len)+1);
+
+            for (int i=(int)m_sigma; i > 0; --i) m_C[i] = D[i-1];
+            m_C[0] = 0;
+            for (int i=1; i <= (int)m_sigma; ++i) m_C[i] = m_C[i] + m_C[i-1];
+            assert(m_C[sigma]==len);
+            m_char = tmp_char;
+            util::init_support(m_char_rank, &m_char);
+            util::init_support(m_char_select, &m_char);
+        }
 
-	//! Copy constructor
-	succinct_byte_alphabet(const succinct_byte_alphabet& strat)
-		: char2comp(this)
-		, comp2char(this)
-		, C(m_C)
-		, sigma(m_sigma)
-		, m_char(strat.m_char)
-		, m_char_rank(strat.m_char_rank)
-		, m_char_select(strat.m_char_select)
-		, m_C(strat.m_C)
-		, m_sigma(strat.m_sigma)
-	{
-		m_char_rank.set_vector(&m_char);
-		m_char_select.set_vector(&m_char);
-	}
+        //! Copy constructor
+        succinct_byte_alphabet(const succinct_byte_alphabet& strat): char2comp(this), comp2char(this), C(m_C), sigma(m_sigma), sigma_q(m_sigma)
+        {
+            copy(strat);
+        }
 
-	//! Move constructor
-	succinct_byte_alphabet(succinct_byte_alphabet&& strat)
-		: char2comp(this)
-		, comp2char(this)
-		, C(m_C)
-		, sigma(m_sigma)
-		, m_char(std::move(strat.m_char))
-		, m_char_rank(std::move(strat.m_char_rank))
-		, m_char_select(std::move(strat.m_char_select))
-		, m_C(std::move(strat.m_C))
-		, m_sigma(std::move(strat.m_sigma))
-	{
-		m_char_rank.set_vector(&m_char);
-		m_char_select.set_vector(&m_char);
-	}
+        //! Move constructor
+        succinct_byte_alphabet(succinct_byte_alphabet&& strat)
+        {
+            *this = std::move(strat);
+        }
 
+        succinct_byte_alphabet& operator=(const succinct_byte_alphabet& strat)
+        {
+            if (this != &strat) {
+                copy(strat);
+            }
+            return *this;
+        }
 
-	succinct_byte_alphabet& operator=(const succinct_byte_alphabet& strat)
-	{
-		if (this != &strat) {
-			succinct_byte_alphabet tmp(strat);
-			*this = std::move(tmp);
-		}
-		return *this;
-	}
+        succinct_byte_alphabet& operator=(succinct_byte_alphabet&& strat)
+        {
+            if (this != &strat) {
+                m_char        = std::move(strat.m_char);
+                m_char_rank   = std::move(strat.m_char_rank);
+                m_char_rank.set_vector(&m_char);
+                m_char_select = std::move(strat.m_char_select);
+                m_char_select.set_vector(&m_char);
+                m_C           = std::move(strat.m_C);
+                m_sigma       = std::move(strat.m_sigma);
+            }
+            return *this;
+        }
 
-	succinct_byte_alphabet& operator=(succinct_byte_alphabet&& strat)
-	{
-		if (this != &strat) {
-			m_char		= std::move(strat.m_char);
-			m_char_rank = std::move(strat.m_char_rank);
-			m_char_rank.set_vector(&m_char);
-			m_char_select = std::move(strat.m_char_select);
-			m_char_select.set_vector(&m_char);
-			m_C		= std::move(strat.m_C);
-			m_sigma = std::move(strat.m_sigma);
-		}
-		return *this;
-	}
+        //! Serialize method
+        size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const
+        {
+            structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
+            size_type written_bytes = 0;
+            written_bytes += m_char.serialize(out, child, "m_char");
+            written_bytes += m_char_rank.serialize(out, child, "m_char_rank");
+            written_bytes += m_char_select.serialize(out, child, "m_char_select");
+            written_bytes += m_C.serialize(out, child, "m_C");
+            written_bytes += write_member(m_sigma, out, child, "m_sigma");
+            structure_tree::add_size(child, written_bytes);
+            return written_bytes;
+        }
 
-	//! Serialize method
-	size_type
-	serialize(std::ostream& out, structure_tree_node* v = nullptr, std::string name = "") const
-	{
-		structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
-		size_type			 written_bytes = 0;
-		written_bytes += m_char.serialize(out, child, "m_char");
-		written_bytes += m_char_rank.serialize(out, child, "m_char_rank");
-		written_bytes += m_char_select.serialize(out, child, "m_char_select");
-		written_bytes += m_C.serialize(out, child, "m_C");
-		written_bytes += write_member(m_sigma, out, child, "m_sigma");
-		structure_tree::add_size(child, written_bytes);
-		return written_bytes;
-	}
+        //! Load method
+        void load(std::istream& in)
+        {
+            m_char.load(in);
+            m_char_rank.load(in);
+            m_char_rank.set_vector(&m_char);
+            m_char_select.load(in);
+            m_char_select.set_vector(&m_char);
+            m_C.load(in);
+            read_member(m_sigma, in);
+        }
+};
 
-	//! Load method
-	void load(std::istream& in)
-	{
-		m_char.load(in);
-		m_char_rank.load(in);
-		m_char_rank.set_vector(&m_char);
-		m_char_select.load(in);
-		m_char_select.set_vector(&m_char);
-		m_C.load(in);
-		read_member(m_sigma, in);
-	}
+class multibyte_tag;
+class multibyte_comp_char;
+
+class multibyte_comp_char
+{
+    private:
+        uint64_t m_x; // value
+    public:
+        typedef multibyte_tag type;
+
+        template<typename t_alphabet_strat>
+        friend bool cyclic_insert_hi(typename t_alphabet_strat::multi_comp_char_type&,
+                                     typename t_alphabet_strat::char_type,
+                                     const t_alphabet_strat&);
+
+        template<typename t_alphabet_strat>
+        friend bool cyclic_insert_lo(typename t_alphabet_strat::multi_comp_char_type&,
+                                     typename t_alphabet_strat::char_type,
+                                     const t_alphabet_strat&);
+
+        multibyte_comp_char(uint64_t x) : m_x(x) {}
+
+        explicit operator uint64_t() const
+        {
+            return m_x;
+        }
+
+        multibyte_comp_char operator+(uint64_t add)const
+        {
+            return multibyte_comp_char(m_x+add);
+        }
 };
 
-template<typename bit_vector_type, typename size_type>
-void init_char_bitvector(bit_vector_type& char_bv, const std::map<size_type, size_type> &D) {
-    // note: the alphabet has at least size 1, so the following is safe:
-    auto  largest_symbol = (--D.end())->first;
-    bit_vector tmp_char(largest_symbol + 1, 0);
-    for (const auto& x : D ) {
-        tmp_char[x.first] = 1;
-    }
-    char_bv = tmp_char;
+template<typename t_alphabet_strat>
+bool cyclic_insert_hi(typename t_alphabet_strat::multi_comp_char_type& mc,
+                      typename t_alphabet_strat::char_type c,
+                      const t_alphabet_strat& alphabet)
+{
+    auto cc = alphabet.char2comp[c];
+    if (cc == 0 and c > 0)
+        return false;
+//    std::cout<<"mc.mx="<<mc.m_x<<" cc="<<(int)cc<<std::endl;
+    mc.m_x /= alphabet.sigma;
+    mc.m_x += (cc * alphabet.sigma_q_1);
+//    std::cout<<"mc.mx="<<mc.m_x<<" alphabet.sigma_q_1="<<alphabet.sigma_q_1<<std::endl;
+    return true;
 }
 
-template<typename t_hi_bit_vector, typename t_select_1, typename t_select_0, typename size_type>
-void init_char_bitvector(sd_vector<t_hi_bit_vector, t_select_1, t_select_0> &char_bv, const std::map<size_type, size_type> &D) {
-    auto  largest_symbol = (--D.end())->first;
-    sd_vector_builder builder(largest_symbol + 1, D.size());
-    for (const auto& x : D ) {
-        builder.set(x.first);
-    }
-    char_bv = std::move(sd_vector<t_hi_bit_vector, t_select_1, t_select_0>(builder));
+template<typename t_alphabet_strat>
+bool cyclic_insert_low(typename t_alphabet_strat::multi_comp_char_type& mc,
+                       typename t_alphabet_strat::char_type c,
+                       const t_alphabet_strat& alphabet)
+{
+    auto cc = alphabet.char2comp[c];
+    if (cc == 0 and c > 0)
+        return false;
+    mc.m_x *= alphabet.sigma;
+    mc.m_x = (mc.m_x + cc) % alphabet.sigma_q;
 }
 
+//! A space-efficient representation for a multi-byte alphabet strategy.
+/*!
+ *  \tparam t_q
+ *  The mapping `char2comp` and its inverse `comp2char` is realized internally
+ *  by a bitvector of size 256 bits and a rank and a select structure. The rank
+ *  structure is used to calculate `char2comp`; the select structure is used to
+ *  calculate `comp2char`. Array `C` is represented by a bit-compressed
+ *  `int_vector` and `sigma` by a uint16_t.
+ *  The types to represent `char2comp`, `comp2char`, and `C` can be specified
+ *  by template parameters.
+ */
+template<uint8_t t_q,
+         typename bit_vector_type,
+         typename rank_support_type,
+         typename select_support_type
+         >
+class succinct_multibyte_alphabet
+{
+    public:
+        struct multibyte_C;
+        friend class char2comp_wrapper<succinct_multibyte_alphabet>;
+        friend class comp2char_wrapper<succinct_multibyte_alphabet>;
+        typedef char2comp_wrapper<succinct_multibyte_alphabet> char2comp_type;
+        typedef comp2char_wrapper<succinct_multibyte_alphabet> comp2char_type;
+        typedef multibyte_C C_type;
+        static constexpr uint8_t q = t_q;
+
+        typedef int_vector<>::size_type size_type;
+        typedef uint16_t                sigma_type;
+        typedef uint64_t                multi_sigma_type;
+        typedef uint8_t                 char_type;
+        typedef uint8_t                 comp_char_type;
+        typedef std::array<uint8_t,q>   multi_char_type;
+        typedef multibyte_comp_char     multi_comp_char_type;
+        typedef std::string             string_type;
+        typedef byte_alphabet_tag       alphabet_category;
+        enum { int_width = 8 };
+
+        struct multibyte_C { // TODO add proper constructor
+            int_vector<> C;
+            int_vector<> multi_C;
+
+            size_type operator[](comp_char_type c) const
+            {
+                return C[c];
+            }
+
+            typename std::enable_if<
+            std::is_same<typename multi_comp_char_type::type,
+                multibyte_tag>::value, size_type>::type
+                operator[](multi_comp_char_type c) const
+            {
+                return multi_C[static_cast<uint64_t>(c)];
+            }
+
+            //! Serialize method
+            size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const
+            {
+                structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
+                size_type written_bytes = 0;
+                written_bytes += C.serialize(out, child, "C");
+                written_bytes += multi_C.serialize(out, child, "multi_C");
+                structure_tree::add_size(child, written_bytes);
+                return written_bytes;
+            }
+
+            //! Load method
+            void load(std::istream& in)
+            {
+                C.load(in);
+                multi_C.load(in);
+            }
+        };
+
+    private:
+        bit_vector_type     m_char;        // `m_char[i]` indicates if character with code i is present or not
+        rank_support_type   m_char_rank;   // rank data structure for `m_char` to answer char2comp
+        select_support_type m_char_select; // select data structure for `m_char` to answer comp2char
+        C_type              m_C;           // cumulative counts for the compact alphabet [0..sigma]
+        sigma_type          m_sigma;       // effective size of the alphabet
+        multi_sigma_type    m_sigma_q;     // sigma^q
+        multi_sigma_type    m_sigma_q_1;   // sigma^{q-1}
+
+        void copy(const succinct_multibyte_alphabet& strat)
+        {
+            m_char        = strat.m_char;
+            m_char_rank   = strat.m_char_rank;
+            m_char_rank.set_vector(&m_char);
+            m_char_select = strat.m_char_select;
+            m_char_select.set_vector(&m_char);
+            m_C           = strat.m_C;
+            m_sigma       = strat.m_sigma;
+        }
+    public:
+
+        const char2comp_type    char2comp;
+        const comp2char_type    comp2char;
+        const C_type&           C;
+        const sigma_type&       sigma;
+        const multi_sigma_type& sigma_q;
+        const multi_sigma_type& sigma_q_1;
+
+        //! Default constructor
+        succinct_multibyte_alphabet() : char2comp(this), comp2char(this),
+            C(m_C), sigma(m_sigma), sigma_q(m_sigma_q),
+            sigma_q_1(m_sigma_q_1)
+        {
+            m_sigma = 0;
+            m_sigma_q = 0;
+        }
+
+        //! Construct from a byte-stream
+        /*!
+         *  \param text_buf Byte stream.
+         *  \param len      Length of the byte stream.
+         */
+        succinct_multibyte_alphabet(int_vector_buffer<8>& text_buf, int_vector_size_type len):
+            char2comp(this), comp2char(this), C(m_C), sigma(m_sigma), sigma_q(m_sigma_q),
+            sigma_q_1(m_sigma_q_1)
+        {
+            m_sigma = 0;
+            if (0 == len or 0 == text_buf.size())
+                return;
+            assert(len <= text_buf.size());
+            // initialize vectors
+            int_vector<64> D(257, 0);
+            bit_vector tmp_char(256, 0);
+            // count occurrences of each symbol
+//            std::cout<<"text=";
+            for (size_type i=0; i < len; ++i) {
+                ++D[text_buf[i]];
+//                std::cout<<(char)text_buf[i];
+            }
+//            std::cout<<std::endl;
+            assert(1 == D[0]); // null-byte should occur exactly once
+            m_sigma = 0;
+            for (int i=0; i<256; ++i)
+                if (D[i]) {
+                    tmp_char[i] = 1;    // mark occurring character
+                    D[m_sigma] = D[i];  // compactify m_C
+                    ++m_sigma;
+                }
+            m_sigma_q = m_sigma;
+            for (uint8_t i=1; i < t_q; ++i) {
+                m_sigma_q *= m_sigma;
+            }
+            m_sigma_q_1 = m_sigma_q/m_sigma;
+            // resize to sigma+1, since CSAs also need the sum of all elements
+            m_C.C       = int_vector<>(m_sigma+1, 0, bits::hi(len)+1);
+            m_C.multi_C = int_vector<>(m_sigma_q+1, 0, bits::hi(len)+1);
+
+            for (int i=(int)m_sigma; i > 0; --i) m_C.C[i] = D[i-1];
+            m_C.C[0] = 0;
+            for (int i=1; i <= (int)m_sigma; ++i) m_C.C[i] = m_C.C[i] + m_C.C[i-1];
+            assert(m_C.C[sigma]==len);
+            m_char = tmp_char;
+            util::init_support(m_char_rank, &m_char);
+            util::init_support(m_char_select, &m_char);
+            if (t_q == 1) {
+                m_C.multi_C = m_C.C;
+            } else if (t_q > 1) {
+                int_vector<64> multi_D(m_sigma_q+1, 0);
+                // count occurrences of each symbol
+                uint64_t x = 0;
+                for (size_type i=0; i<q-1; ++i) {
+                    x *= m_sigma;
+                    x += char2comp[text_buf[i]];
+                }
+                for (size_type i=q-1; i < len+q-1; ++i) {
+                    x *= m_sigma;
+                    x += char2comp[text_buf[i%(len)]];
+                    x %= m_sigma_q;
+                    ++multi_D[x];
+//                    std::cout<<"i="<<i<<" x="<<x<<" D[x]="<<multi_D[x]<<std::endl;
+                }
+                for (size_t i=m_sigma_q; i > 0; --i) {
+                    m_C.multi_C[i] = multi_D[i-1];
+                }
+                m_C.multi_C[0] = 0;
+                for (size_t i=1; i <= m_sigma_q; ++i) {
+                    m_C.multi_C[i] = m_C.multi_C[i] + m_C.multi_C[i-1];
+                }
+//                for (size_t i=0; i <= m_sigma_q; ++i) {
+//                    std::cout<<"m_C.multi_C["<<i<<"]="<<m_C.multi_C[i]<<std::endl;
+//                }
+            }
+        }
+
+        //! Copy constructor
+        succinct_multibyte_alphabet(const succinct_multibyte_alphabet& strat):
+            char2comp(this), comp2char(this), C(m_C), sigma(m_sigma), sigma_q(m_sigma_q),
+            sigma_q_1(m_sigma_q_1)
+        {
+            copy(strat);
+        }
+
+        //! Move constructor
+        succinct_multibyte_alphabet(succinct_multibyte_alphabet&& strat)
+        {
+            *this = std::move(strat);
+        }
+
+        succinct_multibyte_alphabet& operator=(const succinct_multibyte_alphabet& strat)
+        {
+            if (this != &strat) {
+                copy(strat);
+            }
+            return *this;
+        }
+
+        succinct_multibyte_alphabet& operator=(succinct_multibyte_alphabet&& strat)
+        {
+            if (this != &strat) {
+                m_char        = std::move(strat.m_char);
+                m_char_rank   = std::move(strat.m_char_rank);
+                m_char_rank.set_vector(&m_char);
+                m_char_select = std::move(strat.m_char_select);
+                m_char_select.set_vector(&m_char);
+                m_C           = std::move(strat.m_C);
+                m_sigma       = std::move(strat.m_sigma);
+                m_sigma_q     = std::move(strat.m_sigma_q);
+                m_sigma_q_1   = std::move(strat.m_sigma_q_1);
+            }
+            return *this;
+        }
+
+        //! Serialize method
+        size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const
+        {
+            structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
+            size_type written_bytes = 0;
+            written_bytes += m_char.serialize(out, child, "m_char");
+            written_bytes += m_char_rank.serialize(out, child, "m_char_rank");
+            written_bytes += m_char_select.serialize(out, child, "m_char_select");
+            written_bytes += m_C.serialize(out, child, "m_C");
+            written_bytes += write_member(m_sigma, out, child, "m_sigma");
+            written_bytes += write_member(m_sigma_q, out, child, "m_sigma_q");
+            written_bytes += write_member(m_sigma_q_1, out, child, "m_sigma_q_1");
+            structure_tree::add_size(child, written_bytes);
+            return written_bytes;
+        }
+
+        //! Load method
+        void load(std::istream& in)
+        {
+            m_char.load(in);
+            m_char_rank.load(in);
+            m_char_rank.set_vector(&m_char);
+            m_char_select.load(in);
+            m_char_select.set_vector(&m_char);
+            m_C.load(in);
+            read_member(m_sigma, in);
+            read_member(m_sigma_q, in);
+            read_member(m_sigma_q_1, in);
+        }
+};
+
 //! A space-efficient representation for byte alphabets.
 /*!
  *  The mapping `char2comp` and its inverse `comp2char` is realized internally
@@ -486,94 +793,109 @@ void init_char_bitvector(sd_vector<t_hi_bit_vector, t_select_1, t_select_0> &cha
  *  The types to represent `char2comp`, `comp2char`, and `C` can be specified
  *  by template parameters.
  */
-template <class bit_vector_type,
-		  class rank_support_type,
-		  class select_support_type,
-		  class C_array_type>
-class int_alphabet {
-public:
-	class char2comp_wrapper;
-	class comp2char_wrapper;
-	friend class char2comp_wrapper;
-	friend class comp2char_wrapper;
+template<class bit_vector_type, class rank_support_type, class select_support_type, class C_array_type>
+class int_alphabet
+{
+    public:
+        class char2comp_wrapper_int;
+        class comp2char_wrapper_int;
+        friend class char2comp_wrapper_int;
+        friend class comp2char_wrapper_int;
+
+        typedef int_vector<>::size_type size_type;
+        typedef char2comp_wrapper_int       char2comp_type;
+        typedef comp2char_wrapper_int       comp2char_type;
+        typedef C_array_type            C_type;
+        typedef uint64_t                sigma_type;
+        typedef uint64_t                char_type;
+        typedef uint64_t                comp_char_type;
+        typedef std::vector<char_type>  string_type;
+        typedef int_alphabet_tag        alphabet_category;
+        enum { int_width = 0 };
+
+        //! Helper class for the char2comp mapping
+        class char2comp_wrapper_int
+        {
+            private:
+                const int_alphabet* m_strat;
+            public:
+                char2comp_wrapper_int(const int_alphabet* strat) : m_strat(strat) {}
+                comp_char_type operator[](char_type c) const
+                {
+                    if (m_strat->m_char.size() > 0) {  // if alphabet is not continuous
+                        if (c >= m_strat->m_char.size() or !m_strat->m_char[c])
+                            return (comp_char_type)0;
+                        return (comp_char_type) m_strat->m_char_rank((size_type)c);
+                    } else { // direct map if it is continuous
+                        if (c >= m_strat->m_sigma)
+                            return 0;
+                        return (comp_char_type) c;
+                    }
+                    return 0;
+                }
+        };
+
+        //! Helper class for the comp2char mapping
+        class comp2char_wrapper_int
+        {
+            private:
+                const int_alphabet* m_strat;
+            public:
+                comp2char_wrapper_int(const int_alphabet* strat) : m_strat(strat) {}
+                char_type operator[](comp_char_type c) const
+                {
+                    if (m_strat->m_char.size() > 0) {  // if alphabet is not continuous
+                        return (char_type) m_strat->m_char_select(((size_type)c)+1);
+                    } else { // direct map if it is continuous
+                        return (char_type) c;
+                    }
+                }
+        };
+
+    private:
+        bit_vector_type     m_char;        // `m_char[i]` indicates if character with code i is present or not
+        rank_support_type   m_char_rank;   // rank data structure for `m_char` to answer char2comp
+        select_support_type m_char_select; // select data structure for `m_char` to answer comp2char
+        C_type              m_C;           // cumulative counts for the compact alphabet [0..sigma]
+        sigma_type          m_sigma;       // effective size of the alphabet
+
+        void copy(const int_alphabet& strat)
+        {
+            m_char        = strat.m_char;
+            m_char_rank   = strat.m_char_rank;
+            m_char_rank.set_vector(&m_char);
+            m_char_select = strat.m_char_select;
+            m_char_select.set_vector(&m_char);
+            m_C           = strat.m_C;
+            m_sigma       = strat.m_sigma;
+        }
 
-	typedef int_vector<>::size_type size_type;
-	typedef char2comp_wrapper		char2comp_type;
-	typedef comp2char_wrapper		comp2char_type;
-	typedef C_array_type			C_type;
-	typedef uint64_t				sigma_type;
-	typedef uint64_t				char_type;
-	typedef uint64_t				comp_char_type;
-	typedef std::vector<char_type>  string_type;
-	typedef int_alphabet_tag		alphabet_category;
-	enum { int_width = 0 };
-
-	//! Helper class for the char2comp mapping
-	class char2comp_wrapper {
-	private:
-		const int_alphabet* m_strat;
-
-	public:
-		char2comp_wrapper(const int_alphabet* strat) : m_strat(strat) {}
-		comp_char_type operator[](char_type c) const
-		{
-			if (m_strat->m_char.size() > 0) { // if alphabet is not continuous
-				if (c >= m_strat->m_char.size() or !m_strat->m_char[c]) return (comp_char_type)0;
-				return (comp_char_type)m_strat->m_char_rank((size_type)c);
-			} else { // direct map if it is continuous
-				if (c >= m_strat->m_sigma) return 0;
-				return (comp_char_type)c;
-			}
-			return 0;
-		}
-	};
-
-	//! Helper class for the comp2char mapping
-	class comp2char_wrapper {
-	private:
-		const int_alphabet* m_strat;
-
-	public:
-		comp2char_wrapper(const int_alphabet* strat) : m_strat(strat) {}
-		char_type operator[](comp_char_type c) const
-		{
-			if (m_strat->m_char.size() > 0) { // if alphabet is not continuous
-				return (char_type)m_strat->m_char_select(((size_type)c) + 1);
-			} else { // direct map if it is continuous
-				return (char_type)c;
-			}
-		}
-	};
+        //! Check if the alphabet is continuous.
+        bool is_continuous_alphabet(std::map<size_type, size_type>& D)
+        {
+            if (D.size() == 0) {  // an empty alphabet is continuous
+                return true;
+            } else {
+                //            max key      + 1  ==  size of map
+                return ((--D.end())->first + 1) ==  D.size();
+            }
+        }
 
-	const char2comp_type char2comp;
-	const comp2char_type comp2char;
-	const C_type&		 C;
-	const sigma_type&	sigma;
+    public:
 
-private:
-	bit_vector_type		m_char; // `m_char[i]` indicates if character with code i is present or not
-	rank_support_type   m_char_rank;   // rank data structure for `m_char` to answer char2comp
-	select_support_type m_char_select; // select data structure for `m_char` to answer comp2char
-	C_type				m_C;		   // cumulative counts for the compact alphabet [0..sigma]
-	sigma_type			m_sigma;	   // effective size of the alphabet
-
-	//! Check if the alphabet is continuous.
-	bool is_continuous_alphabet(std::map<size_type, size_type>& D)
-	{
-		if (D.size() == 0) { // an empty alphabet is continuous
-			return true;
-		} else {
-			//            max key      + 1  ==  size of map
-			return ((--D.end())->first + 1) == D.size();
-		}
-	}
+        const char2comp_type char2comp;
+        const comp2char_type comp2char;
+        const C_type&        C;
+        const sigma_type&    sigma;
 
-public:
-	//! Default constructor
-	int_alphabet() : char2comp(this), comp2char(this), C(m_C), sigma(m_sigma), m_sigma(0) {}
+        //! Default constructor
+        int_alphabet() : char2comp(this), comp2char(this), C(m_C), sigma(m_sigma)
+        {
+            m_sigma = 0;
+        }
 
-	//! Construct from a byte-stream
-	/*!
+        //! Construct from a byte-stream
+        /*!
          *  \param text_buf Byte stream.
          *  \param len      Length of the byte stream.
          */
@@ -593,7 +915,15 @@ class int_alphabet {
 		if (is_continuous_alphabet(D)) {
 			// do not initialize m_char, m_char_rank and m_char_select since we can map directly
 		} else {
-            init_char_bitvector(m_char, D);
+            // note: the alphabet has at least size 1, so the following is safe:
+            size_type largest_symbol = (--D.end())->first;
+            bit_vector tmp_char(largest_symbol+1, 0);
+            for (std::map<size_type, size_type>::const_iterator it = D.begin(), end=D.end(); it != end; ++it) {
+                tmp_char[it->first] = 1;
+            }
+            m_char = tmp_char;
+            util::init_support(m_char_rank, &m_char);
+            util::init_support(m_char_select, &m_char);
 		}
 		assert(D.find(0) != D.end() and 1 == D[0]); // null-byte should occur exactly once
 
diff --git a/include/sdsl/csa_bitcompressed.hpp b/include/sdsl/csa_bitcompressed.hpp
index e69e84401..72c8f8879 100644
--- a/include/sdsl/csa_bitcompressed.hpp
+++ b/include/sdsl/csa_bitcompressed.hpp
@@ -215,12 +215,12 @@ class csa_bitcompressed {
 private:
 	// Calculates how many symbols c are in the prefix [0..i-1] of the BWT of the original text.
 	/*
-         *  \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$.
-         *  \param c The symbol to count the occurrences in the prefix.
-         *    \returns The number of occurrences of symbol c in the prefix [0..i-1] of the BWT.
-         *  \par Time complexity
-         *        \f$ \Order{\log n} \f$
-         */
+     *  \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$.
+     *  \param c The symbol to count the occurrences in the prefix.
+     *    \returns The number of occurrences of symbol c in the prefix [0..i-1] of the BWT.
+     *  \par Time complexity
+     *        \f$ \Order{\log n} \f$
+     */
 	size_type rank_bwt(size_type i, const char_type c) const
 	{
 		// TODO: special case if c == BWT[i-1] we can use LF to get a constant time answer
@@ -244,15 +244,29 @@ class csa_bitcompressed {
 		}
 	}
 
-	// Calculates the i-th occurrence of symbol c in the BWT of the original text.
-	/*
-         *  \param i The i-th occurrence. \f$i\in [1..rank(size(),c)]\f$.
-         *  \param c Character c.
-         *    \returns The i-th occurrence of c in the BWT or size() if c does
-         *           not occur t times in BWT>
-         *  \par Time complexity
-         *        \f$ \Order{t_{\Psi}} \f$
-         */
+    // Calculates how many symbols c are in the prefix [0..ij[0]-1] and [0..ij[1]-1] of the BWT of the original text.
+    /* \param ij The exlusive indices of the prefix ranges [0..ij[0]] and [0..ij[1]]
+     * \param c The symbol to count
+     *  \returns An array of size two which contains the occurrences of symbols c in the prefix [0..ij[0]-1] and [0..ij[1]-1]
+     * \par Time compelxity
+     *    \f$ \Order{\log n} \f$
+     */
+    std::array<size_type,2>
+    rank_bwt(std::array<size_type,2> ij, const char_type c)const
+    {
+        return {rank_bwt(ij[0], c), rank_bwt(ij[1],c)};
+    }
+
+
+    // Calculates the i-th occurrence of symbol c in the BWT of the original text.
+    /*
+     *  \param i The i-th occurrence. \f$i\in [1..rank(size(),c)]\f$.
+     *  \param c Character c.
+     *    \returns The i-th occurrence of c in the BWT or size() if c does
+     *           not occur t times in BWT>
+     *  \par Time complexity
+     *        \f$ \Order{t_{\Psi}} \f$
+     */
 	size_type select_bwt(size_type i, const char_type c) const
 	{
 		comp_char_type cc = char2comp[c];
diff --git a/include/sdsl/csa_sada.hpp b/include/sdsl/csa_sada.hpp
index 38c3aedf8..8451c9656 100644
--- a/include/sdsl/csa_sada.hpp
+++ b/include/sdsl/csa_sada.hpp
@@ -9,6 +9,7 @@
 #define INCLUDED_SDSL_CSA_SADA
 
 #include "enc_vector.hpp"
+#include "enc_vector2.hpp"
 #include "int_vector.hpp"
 #include "iterators.hpp"
 #include "suffix_array_helper.hpp"
@@ -36,130 +37,134 @@ namespace sdsl {
   *  \sa sdsl::csa_wt, sdsl::csa_bitcompressed
   * @ingroup csa
  */
-template <class t_enc_vec		   = enc_vector<>, // Vector type used to store the Psi-function
-		  uint32_t t_dens		   = 32,		   // Sample density for suffix array (SA) values
-		  uint32_t t_inv_dens	  = 64, // Sample density for inverse suffix array (ISA) values
-		  class t_sa_sample_strat  = sa_order_sa_sampling<>, // Policy class for the SA sampling.
-		  class t_isa_sample_strat = isa_sampling<>,		 // Policy class for ISA sampling.
-		  class t_alphabet_strat =
-		  byte_alphabet // Policy class for the representation of the alphabet.
-		  >
-class csa_sada {
-	static_assert(is_enc_vec<t_enc_vec>::value,
-				  "First template argument has to be of type env_vector.");
-	static_assert(t_dens > 0, "Second template argument has to be greater then 0.");
-	static_assert(t_inv_dens > 0, "Third template argument has to be greater then 0.");
-	static_assert(
-	std::is_same<typename sampling_tag<t_sa_sample_strat>::type, sa_sampling_tag>::value,
-	"Forth template argument has to be a suffix array sampling strategy.");
-	static_assert(
-	std::is_same<typename sampling_tag<t_isa_sample_strat>::type, isa_sampling_tag>::value,
-	"Fifth template argument has to be a inverse suffix array sampling strategy.");
-	static_assert(is_alphabet<t_alphabet_strat>::value,
-				  "Sixth template argument has to be a alphabet strategy.");
-
-	friend class bwt_of_csa_psi<csa_sada>;
-
-public:
-	enum { sa_sample_dens = t_dens, isa_sample_dens = t_inv_dens };
-
-	typedef uint64_t							   value_type;
-	typedef random_access_const_iterator<csa_sada> const_iterator;
-	typedef const_iterator						   iterator;
-	typedef const value_type					   const_reference;
-	typedef const_reference						   reference;
-	typedef const_reference*					   pointer;
-	typedef const pointer						   const_pointer;
-	typedef int_vector<>::size_type				   size_type;
-	typedef size_type							   csa_size_type;
-	typedef ptrdiff_t							   difference_type;
-	typedef t_enc_vec							   enc_vector_type;
-	typedef enc_vector_type						   psi_type;
-	typedef traverse_csa_psi<csa_sada, false> lf_type;
-	typedef bwt_of_csa_psi<csa_sada>							 bwt_type;
-	typedef isa_of_csa_psi<csa_sada>							 isa_type;
-	typedef text_of_csa<csa_sada>								 text_type;
-	typedef first_row_of_csa<csa_sada>							 first_row_type;
-	typedef typename t_sa_sample_strat::template type<csa_sada>  sa_sample_type;
-	typedef typename t_isa_sample_strat::template type<csa_sada> isa_sample_type;
-	typedef t_alphabet_strat									 alphabet_type;
-	typedef typename alphabet_type::alphabet_category			 alphabet_category;
-	typedef typename alphabet_type::comp_char_type				 comp_char_type;
-	typedef typename alphabet_type::char_type
-												char_type; // Note: This is the char type of the CSA not the WT!
-	typedef typename alphabet_type::string_type string_type;
-	typedef csa_sada							csa_type;
-
-	typedef csa_tag index_category;
-	typedef psi_tag extract_category;
-
-	friend class traverse_csa_psi<csa_sada, true>;
-	friend class traverse_csa_psi<csa_sada, false>;
-
-	static const uint32_t linear_decode_limit = 100000;
-
-private:
-	enc_vector_type m_psi;		  // psi function
-	sa_sample_type  m_sa_sample;  // suffix array samples
-	isa_sample_type m_isa_sample; // inverse suffix array samples
-	alphabet_type   m_alphabet;   // alphabet component
-
-	mutable std::vector<uint64_t> m_psi_buf; // buffer for decoded psi values
-
-	void create_buffer()
-	{
-		if (enc_vector_type::sample_dens < linear_decode_limit) {
-			m_psi_buf = std::vector<uint64_t>(enc_vector_type::sample_dens + 1);
-		}
-	}
-
-public:
-	const typename alphabet_type::char2comp_type& char2comp  = m_alphabet.char2comp;
-	const typename alphabet_type::comp2char_type& comp2char  = m_alphabet.comp2char;
-	const typename alphabet_type::C_type&		  C			 = m_alphabet.C;
-	const typename alphabet_type::sigma_type&	 sigma		 = m_alphabet.sigma;
-	const psi_type&								  psi		 = m_psi;
-	const lf_type								  lf		 = lf_type(*this);
-	const bwt_type								  bwt		 = bwt_type(*this);
-	const isa_type								  isa		 = isa_type(*this);
-	const bwt_type								  L			 = bwt_type(*this);
-	const first_row_type						  F			 = first_row_type(*this);
-	const text_type								  text		 = text_type(*this);
-	const sa_sample_type&						  sa_sample  = m_sa_sample;
-	const isa_sample_type&						  isa_sample = m_isa_sample;
-
-
-	//! Default Constructor
-	csa_sada() { create_buffer(); }
-	//! Default Destructor
-	~csa_sada() {}
-
-	//! Copy constructor
-	csa_sada(const csa_sada& csa)
-		: m_psi(csa.m_psi)
-		, m_sa_sample(csa.m_sa_sample)
-		, m_isa_sample(csa.m_isa_sample)
-		, m_alphabet(csa.m_alphabet)
-	{
-		create_buffer();
-		m_isa_sample.set_vector(&m_sa_sample);
-	}
-
-	//! Move constructor
-	csa_sada(csa_sada&& csa)
-		: m_psi(std::move(csa.m_psi))
-		, m_sa_sample(std::move(csa.m_sa_sample))
-		, m_isa_sample(std::move(csa.m_isa_sample))
-		, m_alphabet(std::move(csa.m_alphabet))
-	{
-		create_buffer();
-		m_isa_sample.set_vector(&m_sa_sample);
-	}
-
-	csa_sada(cache_config& config);
-
-	//! Number of elements in the \f$\CSA\f$.
-	/*! Required for the Container Concept of the STL.
+template<class t_enc_vec         = enc_vector<>,          // Vector type used to store the Psi-function
+         uint32_t t_dens         = 32,                    // Sample density for suffix array (SA) values
+         uint32_t t_inv_dens     = 64,                    // Sample density for inverse suffix array (ISA) values
+         class t_sa_sample_strat = sa_order_sa_sampling<>,// Policy class for the SA sampling.
+         class t_isa_sample_strat= isa_sampling<>,        // Policy class for ISA sampling.
+         class t_alphabet_strat  = byte_alphabet          // Policy class for the representation of the alphabet.
+         >
+class csa_sada
+{
+        static_assert(is_enc_vec<t_enc_vec>::value,
+                      "First template argument has to be of type env_vector.");
+        static_assert(t_dens > 0,
+                      "Second template argument has to be greater then 0.");
+        static_assert(t_inv_dens > 0,
+                      "Third template argument has to be greater then 0.");
+        static_assert(std::is_same<typename sampling_tag<t_sa_sample_strat>::type, sa_sampling_tag>::value,
+                      "Forth template argument has to be a suffix array sampling strategy.");
+        static_assert(std::is_same<typename sampling_tag<t_isa_sample_strat>::type, isa_sampling_tag>::value,
+                      "Fifth template argument has to be a inverse suffix array sampling strategy.");
+        static_assert(is_alphabet<t_alphabet_strat>::value,
+                      "Sixth template argument has to be a alphabet strategy.");
+
+        friend class bwt_of_csa_psi<csa_sada>;
+    public:
+        enum { sa_sample_dens = t_dens,
+               isa_sample_dens = t_inv_dens
+             };
+
+        typedef uint64_t                                             value_type;
+        typedef random_access_const_iterator<csa_sada>               const_iterator;
+        typedef const_iterator                                       iterator;
+        typedef const value_type                                     const_reference;
+        typedef const_reference                                      reference;
+        typedef const_reference*                                     pointer;
+        typedef const pointer                                        const_pointer;
+        typedef int_vector<>::size_type                              size_type;
+        typedef size_type                                            csa_size_type;
+        typedef ptrdiff_t                                            difference_type;
+        typedef t_enc_vec                                            enc_vector_type;
+        typedef enc_vector_type                                      psi_type;
+        typedef traverse_csa_psi<csa_sada,false>                     lf_type;
+        typedef bwt_of_csa_psi<csa_sada>                             bwt_type;
+        typedef isa_of_csa_psi<csa_sada>                             isa_type;
+        typedef text_of_csa<csa_sada>                                text_type;
+        typedef first_row_of_csa<csa_sada>                           first_row_type;
+        typedef typename t_sa_sample_strat::template type<csa_sada>  sa_sample_type;
+        typedef typename t_isa_sample_strat::template type<csa_sada> isa_sample_type;
+        typedef t_alphabet_strat                                     alphabet_type;
+        typedef typename alphabet_type::alphabet_category            alphabet_category;
+        typedef typename alphabet_type::comp_char_type               comp_char_type;
+        typedef typename alphabet_type::char_type                    char_type; // Note: This is the char type of the CSA not the WT!
+        typedef typename alphabet_type::string_type                  string_type;
+        typedef csa_sada                                             csa_type;
+
+        typedef csa_tag                                              index_category;
+        typedef psi_tag                                              extract_category;
+
+        friend class traverse_csa_psi<csa_sada,true>;
+        friend class traverse_csa_psi<csa_sada,false>;
+
+        static const uint32_t linear_decode_limit = 100000;
+    private:
+        enc_vector_type m_psi;        // psi function
+        sa_sample_type  m_sa_sample;  // suffix array samples
+        isa_sample_type m_isa_sample; // inverse suffix array samples
+        alphabet_type   m_alphabet;   // alphabet component
+
+        mutable std::vector<uint64_t> m_psi_buf; // buffer for decoded psi values
+
+        void copy(const csa_sada& csa)
+        {
+            m_psi        = csa.m_psi;
+            m_sa_sample  = csa.m_sa_sample;
+            m_isa_sample = csa.m_isa_sample;
+            m_isa_sample.set_vector(&m_sa_sample);
+            m_alphabet   = csa.m_alphabet;
+        };
+
+        void create_buffer()
+        {
+            if (enc_vector_type::sample_dens < linear_decode_limit) {
+                m_psi_buf = std::vector<uint64_t>(enc_vector_type::sample_dens+1);
+            }
+        }
+
+    public:
+        const typename alphabet_type::char2comp_type& char2comp  = m_alphabet.char2comp;
+        const typename alphabet_type::comp2char_type& comp2char  = m_alphabet.comp2char;
+        const typename alphabet_type::C_type&         C          = m_alphabet.C;
+        const typename alphabet_type::sigma_type&     sigma      = m_alphabet.sigma;
+        const alphabet_type&                          alphabet   = m_alphabet;
+        const psi_type&                               psi        = m_psi;
+        const lf_type                                 lf         = lf_type(*this);
+        const bwt_type                                bwt        = bwt_type(*this);
+        const isa_type                                isa        = isa_type(*this);
+        const bwt_type                                L          = bwt_type(*this);
+        const first_row_type                          F          = first_row_type(*this);
+        const text_type                               text       = text_type(*this);
+        const sa_sample_type&                         sa_sample  = m_sa_sample;
+        const isa_sample_type&                        isa_sample = m_isa_sample;
+
+
+        //! Default Constructor
+        csa_sada()
+        {
+            create_buffer();
+        }
+        //! Default Destructor
+        ~csa_sada() { }
+
+        //! Copy constructor
+        csa_sada(const csa_sada& csa)
+        {
+            create_buffer();
+            copy(csa);
+        }
+
+        //! Move constructor
+        csa_sada(csa_sada&& csa)
+        {
+            *this = std::move(csa);
+        }
+
+        csa_sada(cache_config& config);
+
+        //! Number of elements in the \f$\CSA\f$.
+        /*! Required for the Container Concept of the STL.
+>>>>>>> hyb_sd_vector_slow
          *  \sa max_size, empty
          *  \par Time complexity
          *      \f$ \Order{1} \f$
@@ -241,98 +246,217 @@ class csa_sada {
          */
 	void load(std::istream& in);
 
-	uint32_t get_sample_dens() const { return t_dens; }
+    // Calculates how many symbols cc are in the prefix [0..i-1] of the BWT of the original text.
+    /*
+     *  \param i  The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$.
+     *  \param cc The compactified symbol to count in the prefix.
+     *  \returns The number of occurrences of the compactified symbol cc in the prefix [0..i-1].
+     *  \par Time complexity
+     *        \f$ \Order{\log n t_{\Psi}} \f$
+     */
+    template<typename t_char>
+    size_type rank_comp_bwt(size_type i, const t_char cc)const
+    {
+//            std::cout<<"rank_comp_bwt("<<i<<" (cc="<<cc<<")"<<std::endl;
+        if (i == 0)
+            return 0;
+        assert(i <= size());
+        const auto cc_begin = C[cc];   // begin of interval of context cc (inclusive)
+        const auto cc_end   = C[cc+1]; // end of interval of context cc (exclusive)
+        const size_type sd  = m_psi.get_sample_dens();
+        size_type s_begin   = (cc_begin+sd-1)/sd; // first sample at or after cc_begin
+        size_type s_end     = (cc_end+sd-1)/sd;   // first sample at or after cc_end
+//            std::cout<<"cc_begin = "<<cc_begin<<" cc_end = "<<cc_end<<" cc_size="<<cc_end-cc_begin<<std::endl;
+//            std::cout<<"s_begin = "<<s_begin<<" s_end = "<<s_end<<std::endl;
+//            if(s_end - s_begin < 10){
+//                std::cout<<"samples in C range: ";
+//                for(size_t k=s_begin; k<s_end; ++k){
+//                    std::cout<<m_psi.sample(k)<<" (@ "<<s_begin*sd<<") ";
+//                }
+//                std::cout<<std::endl;
+//            }
+
+        if (s_begin == s_end) {
+            // Case (1): No sample inside [cc_begin, cc_end)
+            //           => search in previous block (s_begin-1)
+//                std::cout<<"case (1)"<<std::endl;
+        } else if (m_psi.sample(s_begin) >= i) {  // now s_begin < s_end
+            // Case (2): Some samples inside [cc_begin, cc_end)
+            //           and first sample already larger or equal to i
+            //           => search in previous block (s_begin-1)
+//                std::cout<<"case (2): "<<m_psi.sample(s_begin)<<" >= " << i << std::endl;
+        } else { // still s_begin < s_end
+            // Case (3): Some samples inside [cc_begin, cc_end)
+            //           and first sample smaller than i
+            //           => binary search for first sample >= i
+            s_begin = upper_bound(s_begin, s_end, i-1);
+            //           => search in previous block (s_begin-1)
+//                std::cout<<"case (3): s_begin = " << s_begin << " (s_end=" << s_end <<" )"<< std::endl;
+//                std::cout<<">>>>> m_psi.sample(s_begin-1)="<<m_psi.sample(s_begin-1)<<std::endl;
+        }
+        s_begin -= 1;
+        uint64_t smpl = m_psi.sample(s_begin);
+
+        size_t abs_decode_begin = s_begin*sd;
+        size_t skip = 0;
+        if (abs_decode_begin < cc_begin) {
+            skip = cc_begin - abs_decode_begin;
+        }
+        size_t res = abs_decode_begin + skip - cc_begin;
+
+        if ((s_begin+1)*sd < m_psi.size() and skip == 0 and smpl+sd == m_psi.sample(s_begin+1)) {
+//std::cout<<"!!!Special case"<<std::endl;
+//std::cout<<"s_begin="<<s_begin<<std::endl;
+//std::cout<<"abs_decode_begin="<<abs_decode_begin<<" cc_begin="<<cc_begin<<std::endl;
+//std::cout<<"RES="<<res + (i - smpl)<<" res="<<res<<" i="<<i<<" smpl="<<smpl<<std::endl;
+            return res + (i - smpl);
+        }
+
+        uint64_t* p = m_psi_buf.data();
+        // extract the psi values between two samples
+        m_psi.get_inter_sampled_values(s_begin, p);
+        p = m_psi_buf.data();
+
+        for (auto it = p + skip; (res < cc_end - cc_begin) and it < m_psi_buf.data()+sd; ++it) {
+            if ((*it)+smpl >= i)
+                break;
+            ++res;
+        }
+        return res;
+    }
+
+    template<typename t_char>
+    std::tuple<size_type,size_type> double_rank_comp_bwt(size_type i, size_type j, const t_char cc)const
+    {
+//            std::cout<<"double_rank_comp_bwt("<<i<<","<<j<<" (cc="<<cc<<")"<<std::endl;
+//            return std::make_tuple(rank_comp_bwt(i,cc), rank_comp_bwt(j,cc));
+        if (i == 0)
+            return std::make_tuple(0, rank_comp_bwt(j,cc));
+        assert(i <= size());
+        const auto cc_begin = C[cc];   // begin of interval of context cc (inclusive)
+        const auto cc_end   = C[cc+1]; // end of interval of context cc (exclusive)
+        const size_type sd  = m_psi.get_sample_dens();
+        size_type s_begin   = (cc_begin+sd)/sd; // first sample after cc_begin
+        size_type s_end     = (cc_end+sd-1)/sd;   // first sample at or after cc_end
+        bool answer_j       = false;
+
+        if (s_begin == s_end) {
+            // Case (1): No sample inside [cc_begin, cc_end)
+            //           => search in previous block (s_begin-1)
+            answer_j = true;
+        } else if (m_psi.sample(s_begin) >= i) {  // now s_begin < s_end
+            // Case (2): Some samples inside [cc_begin, cc_end)
+            //           and first sample already larger or equal to i
+            //           => search in previous block (s_begin-1)
+            answer_j = (m_psi.sample(s_begin) >= j);
+        } else { // still s_begin < s_end
+            // Case (3): Some samples inside [cc_begin, cc_end)
+            //           and first sample smaller than i
+            //           => binary search for first sample >= i
+            s_begin = upper_bound(s_begin, s_end, i-1);
+            //           => search in previous block (s_begin-1)
+            answer_j = (s_begin == s_end) or (m_psi.sample(s_begin) >=j);
+        }
+        s_begin -= 1;
+        uint64_t smpl = m_psi.sample(s_begin);
+
+        size_t abs_decode_begin = s_begin*sd;
+        size_t skip = 0;
+        if (abs_decode_begin < cc_begin) {
+            skip = cc_begin - abs_decode_begin;
+        }
+        size_t res = abs_decode_begin + skip - cc_begin;
+
+        bool uniform_block = (s_begin+1)*sd < m_psi.size() and skip == 0 and smpl+sd == m_psi.sample(s_begin+1);
+        if (uniform_block) {
+            if (answer_j) {
+                return std::make_tuple(res + (i - smpl), res + (j - smpl));
+            } else {
+                return std::make_tuple(res + (i - smpl), rank_comp_bwt(j, cc));
+            }
+        }
+
+        uint64_t* p = m_psi_buf.data();
+        // extract the psi values between two samples
+        m_psi.get_inter_sampled_values(s_begin, p);
+        p = m_psi_buf.data();
+
+        auto it = p + skip;
+        for (; (res < cc_end - cc_begin) and it < m_psi_buf.data()+sd; ++it) {
+            if ((*it)+smpl >= i) {
+                break;
+            }
+            ++res;
+        }
+        if (answer_j) {
+            size_t res2 = res;
+            for (; (res2 < cc_end - cc_begin) and it < m_psi_buf.data()+sd; ++it) {
+                if ((*it)+smpl >= j) {
+                    break;
+                }
+                ++res2;
+            }
+            return std::make_tuple(res, res2);
+        }
+        return std::make_tuple(res, rank_comp_bwt(j, cc));
+    }
 
 private:
-	// Calculates how many symbols c are in the prefix [0..i-1] of the BWT of the original text.
-	/*
-         *  \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$.
-         *  \param c The symbol to count the occurrences in the prefix.
-         *    \returns The number of occurrences of symbol c in the prefix [0..i-1] of the BWT.
-         *  \par Time complexity
-         *        \f$ \Order{\log n t_{\Psi}} \f$
-         */
-	size_type rank_bwt(size_type i, const char_type c) const
-	{
-		comp_char_type cc = char2comp[c];
-		if (cc == 0 and c != 0) // character is not in the text => return 0
-			return 0;
-		if (i == 0) return 0;
-		assert(i <= size());
-
-		size_type lower_b, upper_b; // lower_b inclusive, upper_b exclusive
-
-		const size_type sd		 = m_psi.get_sample_dens();
-		size_type		lower_sb = (C[cc] + sd - 1) / sd;	 // lower_sb inclusive
-		size_type		upper_sb = (C[cc + 1] + sd - 1) / sd; // upper_sb exclusive
-		while (lower_sb + 1 < upper_sb) {
-			size_type mid = (lower_sb + upper_sb) / 2;
-			if (m_psi.sample(mid) >= i)
-				upper_sb = mid;
-			else
-				lower_sb = mid;
-		}
-
-		if (lower_sb == upper_sb) { // the interval was smaller than sd
-			lower_b = C[cc];
-			upper_b = C[cc + 1];
-		} else if (lower_sb > (C[cc] + sd - 1) / sd) { // main case
-			// TODO: don't use get_inter_sampled_values if t_dens is really
-			//       large
-			lower_b = lower_sb * sd;
-			if (0 == m_psi_buf.size()) {
-				upper_b = std::min(upper_sb * sd, C[cc + 1]);
-				goto finish;
-			}
-			uint64_t* p = m_psi_buf.data();
-			// extract the psi values between two samples
-			m_psi.get_inter_sampled_values(lower_sb, p);
-			p			  = m_psi_buf.data();
-			uint64_t smpl = m_psi.sample(lower_sb);
-			// handle border cases
-			if (lower_b + m_psi.get_sample_dens() >= C[cc + 1])
-				m_psi_buf[C[cc + 1] - lower_b] = size() - smpl;
-			else
-				m_psi_buf[m_psi.get_sample_dens()] = size() - smpl;
-			// search the result linear
-			while ((*p++) + smpl < i)
-				;
-
-			return p - 1 - m_psi_buf.data() + lower_b - C[cc];
-		} else { // lower_b == (m_C[cc]+sd-1)/sd and lower_sb < upper_sb
-			if (m_psi.sample(lower_sb) >= i) {
-				lower_b = C[cc];
-				upper_b = lower_sb * sd + 1;
-			} else {
-				lower_b = lower_sb * sd;
-				upper_b = std::min(upper_sb * sd, C[cc + 1]);
-			}
-		}
-	finish:
-		// binary search the interval [C[cc]..C[cc+1]-1] for the result
-		//            size_type lower_b = m_C[cc], upper_b = m_C[cc+1]; // lower_b inclusive, upper_b exclusive
-		while (lower_b + 1 < upper_b) {
-			size_type mid = (lower_b + upper_b) / 2;
-			if (m_psi[mid] >= i)
-				upper_b = mid;
-			else
-				lower_b = mid;
-		}
-		if (lower_b > C[cc])
-			return lower_b - C[cc] + 1;
-		else {						   // lower_b == m_C[cc]
-			return m_psi[lower_b] < i; // 1 if m_psi[lower_b]<i, 0 otherwise
-		}
-	}
 
-	// Calculates the position of the i-th c in the BWT of the original text.
-	/*
-         *  \param i The i-th occurrence. \f$i\in [1..rank_bwt(size(),c)]\f$.
-         *  \param c Symbol c.
-         *    \returns The position of the i-th c in the BWT or size() if c does occur less then i times.
-         *  \par Time complexity
-         *        \f$ \Order{t_{\Psi}} \f$
-         */
+    template<typename V>
+    size_t upper_bound(size_t first, size_t last, V value) const
+    {
+        size_t mid;
+        size_t count, step;
+        count = last-first;
+
+        while (count > 0) {
+            mid = first;
+            step = count / 2;
+            mid += step;
+            if (!(value < m_psi.sample(mid))) {
+                first = ++mid;
+                count -= step + 1;
+            } else count = step;
+        }
+        return first;
+    }
+
+    // Calculates how many symbols c are in the prefix [0..i-1] of the BWT of the original text.
+    /*
+     *  \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$.
+     *  \param c The symbol to count in the prefix.
+     *    \returns The number of occurrences of symbol c in the prefix [0..i-1] of the BWT.
+     *  \par Time complexity
+     *        \f$ \Order{\log n t_{\Psi}} \f$
+     */
+    // replace const char_type c by const std::array<char_type, alphabet_type::C_depth>& c
+    template<typename t_char>
+    size_type rank_bwt(size_type i, const t_char c)const
+    {
+        auto cc = char2comp[c];
+        if (cc==0 and c!=0) // character is not in the text => return 0
+            return 0;
+        if (i == 0)
+            return 0;
+        return rank_comp_bwt(i, cc);
+    }
+
+    template<typename t_char>
+    std::array<size_type,2>
+    rank_bwt(std::array<size_type,2> ij, const t_char c)const
+    {
+        return {rank_bwt(ij[0], c), rank_bwt(ij[1],c)};
+    }
+
+    // Calculates the position of the i-th c in the BWT of the original text.
+    /*
+     *  \param i The i-th occurrence. \f$i\in [1..rank_bwt(size(),c)]\f$.
+     *  \param c Symbol c.
+     *    \returns The position of the i-th c in the BWT or size() if c does occur less then i times.
+     *  \par Time complexity
+     *        \f$ \Order{t_{\Psi}} \f$
+     */
 	size_type select_bwt(size_type i, const char_type c) const
 	{
 		assert(i > 0);
@@ -358,52 +482,81 @@ template <class t_enc_vec,
 csa_sada<t_enc_vec, t_dens, t_inv_dens, t_sa_sample_strat, t_isa, t_alphabet_strat>::csa_sada(
 cache_config& config)
 {
-	create_buffer();
-	if (!cache_file_exists(key_bwt<alphabet_type::int_width>(), config)) {
-		return;
-	}
-	size_type n = 0;
-	{
-		int_vector_buffer<alphabet_type::int_width> bwt_buf(
-		cache_file_name(key_bwt<alphabet_type::int_width>(), config));
-		n		   = bwt_buf.size();
-		auto event = memory_monitor::event("construct csa-alpbabet");
-		m_alphabet = alphabet_type(bwt_buf, n);
-	}
-	{
-		auto event  = memory_monitor::event("sample SA");
-		m_sa_sample = sa_sample_type(config);
-	}
-	{
-		auto			event = memory_monitor::event("sample ISA");
-		isa_sample_type isa_s(config, &m_sa_sample);
-		util::swap_support(m_isa_sample, isa_s, &m_sa_sample, (const sa_sample_type*)nullptr);
-	}
-	// if ( config.delete_files ) {
-	//     remove_from_cache<int_vector<>>(conf::KEY_SA, config);
-	// }
-
-	int_vector<> cnt_chr(sigma, 0, bits::hi(n) + 1);
-	for (typename alphabet_type::sigma_type i = 0; i < sigma; ++i) {
-		cnt_chr[i] = C[i];
-	}
-	// calculate psi
-	{
-		auto										event = memory_monitor::event("construct PSI");
-		int_vector_buffer<alphabet_type::int_width> bwt_buf(
-		cache_file_name(key_bwt<alphabet_type::int_width>(), config));
-		std::string psi_file = cache_file_name(conf::KEY_PSI, config);
-		auto		psi		 = write_out_mapper<>::create(psi_file, n, bits::hi(n) + 1);
-		for (size_type i = 0; i < n; ++i) {
-			psi[cnt_chr[char2comp[bwt_buf[i]]]++] = i;
-		}
-		register_cache_file(conf::KEY_PSI, config);
-	}
-	{
-		auto				event = memory_monitor::event("encode PSI");
-		int_vector_buffer<> psi_buf(cache_file_name(conf::KEY_PSI, config));
-		m_psi = t_enc_vec(psi_buf);
-	}
+    create_buffer();
+    if (!cache_file_exists(key_bwt<alphabet_type::int_width>(), config)) {
+        return;
+    }
+    int_vector_buffer<alphabet_type::int_width> bwt_buf(cache_file_name(key_bwt<alphabet_type::int_width>(),config));
+    size_type n = bwt_buf.size();
+    {
+        auto event = memory_monitor::event("construct csa-alpbabet");
+//        alphabet_type tmp_alphabet(bwt_buf, n); // TODO: maybe it is possible to use _buf_buf again for multibyte!!
+        int_vector_buffer<alphabet_type::int_width> text_buf(cache_file_name(key_text<alphabet_type::int_width>(),config));
+        m_alphabet = alphabet_type(text_buf, n);
+    }
+
+    int_vector<> cnt_chr(sigma, 0, bits::hi(n)+1);
+    for (typename alphabet_type::sigma_type i=0; i < sigma; ++i) {
+        cnt_chr[i] = C[i];
+    }
+    // calculate psi
+    {
+        auto event = memory_monitor::event("construct PSI");
+        // TODO: move PSI construct into construct_PSI.hpp
+        int_vector<> psi(n, 0, bits::hi(n)+1);
+        for (size_type i=0; i < n; ++i) {
+            psi[ cnt_chr[ char2comp[bwt_buf[i]] ]++ ] = i;
+        }
+        std::string psi_file = cache_file_name(conf::KEY_PSI, config);
+        if (!store_to_cache(psi, conf::KEY_PSI, config)) {
+            return;
+        }
+    }
+    {
+        auto event = memory_monitor::event("encode PSI");
+        int_vector_buffer<> psi_buf(cache_file_name(conf::KEY_PSI, config));
+        m_psi = t_enc_vec(psi_buf);
+        /*
+                enc_vector<coder::elias_delta, enc_vector_type::sample_dens> m_psi_check(psi_buf);
+                if ( m_psi_check.size() != m_psi.size() ){
+                    std::cout<<"m_psi.size()="<<m_psi.size()<<"!="<<m_psi_check.size()<<" m_psi_check.size()"<<std::endl;
+                } else {
+
+                    std::vector<uint64_t> buf1 = std::vector<uint64_t>(enc_vector_type::sample_dens+1);
+                    std::vector<uint64_t> buf2 = std::vector<uint64_t>(enc_vector_type::sample_dens+1);
+
+                    std::cout<<"m_psi.size()="<<m_psi.size()<<std::endl;
+                    for(size_t i=0; i<m_psi.size()/enc_vector_type::sample_dens; ++i){
+                        if ( m_psi.sample(i) != m_psi_check.sample(i) ) {
+                            std::cout<<"m_psi.sample(i) != m_psi_check.sample(i) for i="<<i<<" "<<m_psi.sample(i)<<"!="<<m_psi_check.sample(i)<<std::endl;
+                        }
+                        m_psi.get_inter_sampled_values(i, buf1.data());
+                        m_psi_check.get_inter_sampled_values(i, buf2.data());
+                        bool error = false;
+                        for(size_t j=0; j<enc_vector_type::sample_dens and i*enc_vector_type::sample_dens+j<m_psi.size(); ++j) {
+                            if ( buf1[j] != buf2[j] ) {
+                                std::cout<<"i="<<i<<" j="<<j<<" buf1[j]="<<buf1[j]<<" buf2[j]="<<buf2[j]<<std::endl;
+                                error = true;
+                            }
+                        }
+                        if (error) {
+                            std::cout<<" m_psi.sample(i)="<<m_psi.sample(i)<<std::endl;
+                            std::cout<<" m_psi.sample(i+1)="<<m_psi.sample(i+1)<<" m_psi_check.sample(i+1)="<<m_psi_check.sample(i+1)<<std::endl;
+                            throw std::logic_error("error");
+                        }
+                    }
+                }
+        */
+    }
+    {
+        auto event = memory_monitor::event("sample SA");
+        m_sa_sample = sa_sample_type(config);
+    }
+    {
+        auto event = memory_monitor::event("sample ISA");
+        isa_sample_type isa_s(config, &m_sa_sample);
+        util::swap_support(m_isa_sample, isa_s, &m_sa_sample, (const sa_sample_type*)nullptr);
+    }
 }
 
 template <class t_enc_vec,
diff --git a/include/sdsl/csa_sada2.hpp b/include/sdsl/csa_sada2.hpp
new file mode 100644
index 000000000..3ebc4a128
--- /dev/null
+++ b/include/sdsl/csa_sada2.hpp
@@ -0,0 +1,655 @@
+// Copyright (c) 2017, the SDSL Project Authors.  All rights reserved.
+// Please see the AUTHORS file for details.  Use of this source code is governed
+// by a BSD license that can be found in the LICENSE file.
+/*! \file csa_sada2.hpp
+    \brief csa_sada2.hpp contains an implementation of the compressed suffix array.
+    \author Simon Gog
+*/
+
+#ifndef INCLUDED_SDSL_CSA_SADAII
+#define INCLUDED_SDSL_CSA_SADAII
+
+#include "bit_vectors.hpp"
+#include "int_vector.hpp"
+#include "iterators.hpp"
+#include "suffix_array_helper.hpp"
+#include "util.hpp"
+#include "io.hpp"
+#include "csa_sampling_strategy.hpp"
+#include "csa_alphabet_strategy.hpp"
+#include <iostream>
+#include <algorithm>
+#include <cassert>
+#include <cstring> // for strlen
+#include <iomanip>
+#include <iterator>
+
+
+
+namespace sdsl
+{
+
+template<typename t_hyb_vec,
+         typename t_csa
+         >
+class uef_psi_support
+{
+    public:
+        typedef typename bit_vector::size_type                size_type;
+        typedef size_type                                     value_type;
+        typedef typename t_csa::alphabet_type                 alphabet_type;
+        typedef typename alphabet_type::comp_char_type        comp_char_type;
+        typedef typename alphabet_type::C_type                C_type;
+        typedef random_access_const_iterator<uef_psi_support> iterator;
+        typedef iterator                                      const_iterator;
+        typedef const value_type                              reference;
+        typedef const value_type                              const_reference;
+        typedef const value_type*                             const_pointer;
+        typedef ptrdiff_t                                     difference_type;
+        typedef csa_member_tag                                category;
+        typedef int_alphabet_tag                              alphabet_category;
+        typedef wt_huff_int<bit_vector,
+                rank_support_v<>,
+                select_support_scan<1>,
+                select_support_scan<0>>                sml_wt_type;
+
+    private:
+        std::vector<t_hyb_vec>                         m_inc_seq;
+        std::vector<typename t_hyb_vec::rank_1_type>   m_inc_seq_rank;
+        std::vector<typename t_hyb_vec::select_1_type> m_inc_seq_sel;
+        bit_vector                                     m_sml;         // indicates if a context is small or large
+        rank_support_v5<>                              m_sml_rank;    // rank for m_sml
+        sml_wt_type                                    m_sml_wt;      // wt to get rank to index into
+        std::vector<int_vector<>>                      m_sml_inc_seq; // small sequences
+
+        const t_csa*                                   m_csa;
+
+        void set_inc_seq_rank_select()
+        {
+            for (size_t i=0; i<m_inc_seq_rank.size(); ++i) {
+                m_inc_seq_rank[i].set_vector(&(m_inc_seq[i]));
+                m_inc_seq_sel[i].set_vector(&(m_inc_seq[i]));
+            }
+        }
+    public:
+
+        uef_psi_support(const t_csa* csa=nullptr)
+        {
+            set_vector(csa);
+        }
+
+        uef_psi_support(int_vector_buffer<>& psi_buf, const t_csa* csa)
+        {
+            set_vector(csa);
+            const auto& C = m_csa->C;
+
+            m_sml = bit_vector(C.size()-1,0);
+            const auto threshold = t_hyb_vec::block_size;
+// (1)      Determine the number of small blocks
+            for (size_t i=0; i<C.size()-1; ++i) {
+                m_sml[i] = (C[i+1]-C[i]) < threshold;
+            }
+            m_sml_rank = decltype(m_sml_rank)(&m_sml);
+            size_t sigma_small = m_sml_rank(C.size()-1);
+            size_t sigma_large = C.size()-1-sigma_small;
+            {
+                int_vector<> sml(sigma_small, 0, bits::hi(threshold)+1);
+
+// (2)          Create a vector containing only the small context sizes
+                for (size_t i=0, ii=0; i<C.size()-1; ++i) {
+                    if (m_sml[i] == 1) {
+                        sml[ii++] = C[i+1]-C[i];
+                    }
+                }
+// (3)          Greate WT over sml
+                construct_im(m_sml_wt, sml, 0);
+            }
+// (4)      Initialize m_sml_inc_seq
+            m_sml_inc_seq.resize(threshold);
+            for (uint64_t cs=1; cs<threshold; ++cs) {
+                auto size = cs * m_sml_wt.rank(m_sml_wt.size(), cs);
+                m_sml_inc_seq[cs] = int_vector<>(size, 0, bits::hi(m_csa->size())+1);
+            }
+
+// (5)      Initialize m_inc_seq (to store the larger contexts)
+            m_inc_seq.resize(sigma_large);
+            m_inc_seq_rank.resize(sigma_large);
+            m_inc_seq_sel.resize(sigma_large);
+            for (size_t i=0,i0=0,i1=0; i<C.size()-1; ++i) {
+                int_vector<> v(C[i+1]-C[i]);
+                for (size_t j=C[i]; j<C[i+1]; ++j) {
+                    v[j-C[i]] = psi_buf[j];
+                }
+                if (m_sml[i]) {
+                    auto rank = m_sml_wt.rank(i1++, v.size());
+                    auto start_pos = rank * v.size();
+                    for (size_t j=0; j<v.size(); ++j) {
+                        m_sml_inc_seq[v.size()][j+start_pos] = v[j];
+                    }
+                } else {
+                    m_inc_seq[i0++] = t_hyb_vec(v.begin(), v.end());
+                }
+            }
+            set_inc_seq_rank_select();
+        }
+
+        uef_psi_support& operator=(const uef_psi_support& psi)
+        {
+            if (this != &psi) {
+                m_inc_seq      = psi.m_inc_seq;
+                m_inc_seq_rank = psi.m_inc_seq_rank;
+                m_inc_seq_sel  = psi.m_inc_seq_sel;
+                m_sml          = psi.m_sml;
+                m_sml_rank     = psi.m_sml_rank;
+                m_sml_rank.set_vector(&m_sml);
+                m_sml_wt       = psi.m_sml_wt;
+                m_sml_inc_seq  = psi.m_sml_inc_seq;
+                set_inc_seq_rank_select();
+                set_vector(psi.m_csa);
+            }
+            return *this;
+        }
+
+        uef_psi_support& operator=(uef_psi_support&& psi)
+        {
+            if (this != &psi) {
+                set_vector(psi.m_csa);
+                m_inc_seq      = std::move(psi.m_inc_seq);
+                m_inc_seq_rank = std::move(psi.m_inc_seq_rank);
+                m_inc_seq_sel  = std::move(psi.m_inc_seq_sel);
+                m_sml          = std::move(psi.m_sml);
+                m_sml_rank     = std::move(psi.m_sml_rank);
+                m_sml_rank.set_vector(&m_sml);
+                m_sml_wt       = std::move(psi.m_sml_wt);
+                m_sml_inc_seq  = std::move(psi.m_sml_inc_seq);
+                set_inc_seq_rank_select();
+            }
+            return *this;
+        }
+
+        void set_vector(const t_csa* csa)
+        {
+            m_csa = csa;
+        }
+
+        uint64_t rank(uint64_t i, comp_char_type cc) const
+        {
+            if (m_sml[cc]) {
+                auto cc_sml  = m_sml_rank(cc);
+                size_type cs = m_csa->C[cc+1] - m_csa->C[cc]; // context size
+                auto rank = m_sml_wt.rank(cc_sml, cs);
+                size_type begin = rank*cs;
+                for (size_t j=0; j<cs; ++j) {
+                    if (m_sml_inc_seq[cs][begin+j] >= i)
+                        return j;
+                }
+                return cs;
+            } else {
+//                std::cout<<"single_rank: for i="<<i<<std::endl;
+                size_type cc_large  = cc - m_sml_rank(cc);
+                return m_inc_seq_rank[cc_large](i);
+            }
+        }
+
+        std::array<uint64_t,2> rank(std::array<uint64_t,2> ij, comp_char_type cc) const
+        {
+            if (m_sml[cc]) {
+                auto cc_sml  = m_sml_rank(cc);
+                size_type cs = m_csa->C[cc+1] - m_csa->C[cc]; // context size
+                auto rnk = m_sml_wt.rank(cc_sml, cs);
+                size_type begin = rnk*cs;
+                std::array<uint64_t,2> res = {{0,0}};
+                size_t j=0;
+                for (size_t k=0; k<2; ++k) {
+                    while (j < cs and  m_sml_inc_seq[cs][begin+j] < ij[k]) {
+                        ++j;
+                    }
+                    res[k] = j;
+                }
+//                std::array<uint64_t,2> res2 = {rank(ij[0],cc),rank(ij[1],cc)};
+//                if ( res != res2 ){
+//                    std::cout<<"double rank: res=["<<res[0]<<","<<res[1]<<"] != ";
+//                    std::cout<<"["<<res2[0]<<","<<res2[1]<<"] for"<<
+//                        ij[0]<<" and "<<ij[1]<<std::endl;
+//                }
+                return res;
+            } else {
+                size_type cc_large  = cc - m_sml_rank(cc);
+                auto res = m_inc_seq_rank[cc_large](ij);
+//                std::array<uint64_t,2> res2 = {rank(ij[0],cc),rank(ij[1],cc)};
+//                std::cout<<"_double rank: res=["<<res[0]<<","<<res[1]<<"] != ";
+//                std::cout<<"["<<res2[0]<<","<<res2[1]<<"] for "<<
+//                    ij[0]<<" and "<<ij[1]<<std::endl;
+                return res;
+            }
+        }
+
+        uint64_t select(uint64_t i, comp_char_type cc) const
+        {
+            if (m_sml[cc]) {
+                auto cc_sml  = m_sml_rank(cc);
+                size_type cs = m_csa->C[cc+1] - m_csa->C[cc]; // context size
+                auto rank = m_sml_wt.rank(cc_sml, cs);
+                return m_sml_inc_seq[cs][rank*cs+(i-1)];
+            } else {
+                size_type cc_large  = cc - m_sml_rank(cc);
+                return m_inc_seq_sel[cc_large](i);
+            }
+        }
+
+        value_type operator[](const size_type i) const
+        {
+//            std::cout<<"call::psi["<<i<<"]"<<std::endl;
+            size_t cc = std::upper_bound(m_csa->C.begin(), m_csa->C.end(),i) - m_csa->C.begin() - 1;
+//            std::cout<<"cc="<<cc<<std::endl;
+            size_t cum_sum = m_csa->C[cc];
+//            std::cout<<"cum_sum="<<cum_sum<<std::endl;
+            if (m_sml[cc]) {
+                auto cc_sml  = m_sml_rank(cc);
+//            std::cout<<"cc_sml="<<cc_sml<<std::endl;
+                size_type cs = m_csa->C[cc+1] - cum_sum; // context size
+//            std::cout<<"cs="<<cs<<std::endl;
+                auto rank = m_sml_wt.rank(cc_sml, cs);
+//            std::cout<<"rank="<<rank<<std::endl;
+                return m_sml_inc_seq[cs][rank*cs+(i-cum_sum)];
+            } else {
+                size_type cc_large  = cc - m_sml_rank(cc);
+//            std::cout<<"cc_large="<<cc_large<<std::endl;
+//            std::cout<<"i-cum_sum+1="<<i-cum_sum+1<<std::endl;
+//            std::cout<<"m_inc_seq[cc_large].size()="<<m_inc_seq[cc_large].size()<<std::endl;
+//            std::cout<<"m_inc_seq[cc_large][i-cum_sum]="<<m_inc_seq[cc_large][i-cum_sum]<<std::endl;
+//            std::cout<<"m_inc_seq[cc_large].size()="<<m_inc_seq[cc_large].size()<<std::endl;
+//            std::cout<<"m_inc_seq_rank[cc_large](m_inc_seq[cc_large].size())="<<m_inc_seq_rank[cc_large](m_inc_seq[cc_large].size())<<std::endl;
+                return m_inc_seq_sel[cc_large](i-cum_sum+1);
+            }
+        }
+
+        size_type size() const
+        {
+            return m_csa->size();
+        }
+
+        //! Serializes the data structure into the given ostream
+        size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const
+        {
+            structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
+            size_type written_bytes = 0;
+            written_bytes += sdsl::serialize(m_inc_seq, out, child, "inc_seq");
+            written_bytes += sdsl::serialize(m_inc_seq_rank, out, child, "inc_seq_rank");
+            written_bytes += sdsl::serialize(m_inc_seq_sel, out, child, "inc_seq_rank");
+            written_bytes += sdsl::serialize(m_sml, out, child, "sml");
+            written_bytes += sdsl::serialize(m_sml_rank, out, child, "sml_rank");
+            written_bytes += sdsl::serialize(m_sml_wt, out, child, "sml_wt");
+            written_bytes += sdsl::serialize(m_sml_inc_seq, out, child, "sml_inc_seq");
+            structure_tree::add_size(child, written_bytes);
+            return written_bytes;
+        }
+
+        //! Loads the data structure from the given istream.
+        void load(std::istream& in, const t_csa* csa = nullptr)
+        {
+            sdsl::load(m_inc_seq, in);
+            sdsl::load(m_inc_seq_rank, in);
+            sdsl::load(m_inc_seq_sel, in);
+            sdsl::load(m_sml, in);
+            sdsl::load(m_sml_rank, in);
+            m_sml_rank.set_vector(&m_sml);
+            sdsl::load(m_sml_wt, in);
+            sdsl::load(m_sml_inc_seq, in);
+            set_inc_seq_rank_select();
+            set_vector(csa);
+        }
+
+        const const_iterator begin()const
+        {
+            return const_iterator(this, 0);
+        }
+
+        const const_iterator end()const
+        {
+            return const_iterator(this, size());
+        }
+
+};
+
+//! A class for the Compressed Suffix Array (CSA) proposed by Sadakane for practical implementation.
+/*!
+  *  \tparam t_enc_vec         Space-efficient vector for increasing integer sequences.
+  *  \tparam t_dens            Sampling density of SA values
+  *  \tparam t_int_dens        Sampling density of ISA values
+  *  \tparam t_sa_sample_strat Policy of SA sampling. E.g. sample in SA-order or text-order.
+  *  \tparam t_isa             Vector type for ISA sample values.
+  *  \tparam t_alphabet_strat  Policy for alphabet representation.
+  *
+  *  \sa sdsl::csa_wt, sdsl::csa_bitcompressed
+  * @ingroup csa
+ */
+template<class t_hyb_sd          = hyb_sd_vector<>,       // Vector type used to store the Psi-function
+         uint32_t t_dens         = 32,                    // Sample density for suffix array (SA) values
+         uint32_t t_inv_dens     = 64,                    // Sample density for inverse suffix array (ISA) values
+         class t_sa_sample_strat = sa_order_sa_sampling<>,// Policy class for the SA sampling.
+         class t_isa_sample_strat= isa_sampling<>,        // Policy class for ISA sampling.
+         class t_alphabet_strat  = byte_alphabet          // Policy class for the representation of the alphabet.
+         >
+class csa_sada2
+{
+        static_assert(t_dens > 0,
+                      "Second template argument has to be greater then 0.");
+        static_assert(t_inv_dens > 0,
+                      "Third template argument has to be greater then 0.");
+        static_assert(std::is_same<typename sampling_tag<t_sa_sample_strat>::type, sa_sampling_tag>::value,
+                      "Forth template argument has to be a suffix array sampling strategy.");
+        static_assert(std::is_same<typename sampling_tag<t_isa_sample_strat>::type, isa_sampling_tag>::value,
+                      "Fifth template argument has to be a inverse suffix array sampling strategy.");
+        static_assert(is_alphabet<t_alphabet_strat>::value,
+                      "Sixth template argument has to be a alphabet strategy.");
+
+        friend class bwt_of_csa_psi<csa_sada2>;
+    public:
+        enum { sa_sample_dens = t_dens,
+               isa_sample_dens = t_inv_dens
+             };
+
+        typedef uint64_t                                             value_type;
+        typedef random_access_const_iterator<csa_sada2>               const_iterator;
+        typedef const_iterator                                       iterator;
+        typedef const value_type                                     const_reference;
+        typedef const_reference                                      reference;
+        typedef const_reference*                                     pointer;
+        typedef const pointer                                        const_pointer;
+        typedef int_vector<>::size_type                              size_type;
+        typedef size_type                                            csa_size_type;
+        typedef ptrdiff_t                                            difference_type;
+        typedef traverse_csa_psi<csa_sada2,false>                     lf_type;
+        typedef bwt_of_csa_psi<csa_sada2>                             bwt_type;
+        typedef isa_of_csa_psi<csa_sada2>                             isa_type;
+        typedef text_of_csa<csa_sada2>                                text_type;
+        typedef first_row_of_csa<csa_sada2>                           first_row_type;
+        typedef typename t_sa_sample_strat::template type<csa_sada2>  sa_sample_type;
+        typedef typename t_isa_sample_strat::template type<csa_sada2> isa_sample_type;
+        typedef t_alphabet_strat                                     alphabet_type;
+        typedef typename alphabet_type::alphabet_category            alphabet_category;
+        typedef typename alphabet_type::comp_char_type               comp_char_type;
+        typedef typename alphabet_type::char_type                    char_type; // Note: This is the char type of the CSA not the WT!
+        typedef typename alphabet_type::string_type                  string_type;
+        typedef csa_sada2                                            csa_type;
+
+        typedef csa_tag                                              index_category;
+        typedef psi_tag                                              extract_category;
+        typedef uef_psi_support<t_hyb_sd, csa_sada2>                 psi_type;
+
+        friend class traverse_csa_psi<csa_sada2,true>;
+        friend class traverse_csa_psi<csa_sada2,false>;
+
+    private:
+        alphabet_type   m_alphabet;    // alphabet component
+        psi_type        m_psi_support; // psi function
+        sa_sample_type  m_sa_sample;   // suffix array samples
+        isa_sample_type m_isa_sample;  // inverse suffix array samples
+
+    public:
+        const typename alphabet_type::char2comp_type& char2comp  = m_alphabet.char2comp;
+        const typename alphabet_type::comp2char_type& comp2char  = m_alphabet.comp2char;
+        const typename alphabet_type::C_type&         C          = m_alphabet.C;
+        const typename alphabet_type::sigma_type&     sigma      = m_alphabet.sigma;
+        const alphabet_type&                          alphabet   = m_alphabet;
+        const psi_type&                               psi        = m_psi_support;
+        const lf_type                                 lf         = lf_type(*this);
+        const bwt_type                                bwt        = bwt_type(*this);
+        const isa_type                                isa        = isa_type(*this);
+        const bwt_type                                L          = bwt_type(*this);
+        const first_row_type                          F          = first_row_type(*this);
+        const text_type                               text       = text_type(*this);
+        const sa_sample_type&                         sa_sample  = m_sa_sample;
+        const isa_sample_type&                        isa_sample = m_isa_sample;
+
+
+        //! Default Constructor
+        csa_sada2() { m_psi_support.set_vector(this); }
+        //! Default Destructor
+        ~csa_sada2() { }
+
+        //! Copy constructor
+        csa_sada2(const csa_sada2& csa)
+        {
+            *this = csa;
+        }
+
+        //! Move constructor
+        csa_sada2(csa_sada2&& csa)
+        {
+            *this = std::move(csa);
+        }
+
+        csa_sada2(cache_config& config);
+
+        //! Number of elements in the \f$\CSA\f$.
+        /*! Required for the Container Concept of the STL.
+         *  \sa max_size, empty
+         *  \par Time complexity
+         *      \f$ \Order{1} \f$
+         */
+        size_type size()const
+        {
+            return C.size() > 0 ? C[C.size()-1] : 0;
+        }
+
+        //! Returns the largest size that csa_sada2 can ever have.
+        /*! Required for the Container Concept of the STL.
+         *  \sa size
+         */
+        static size_type max_size()
+        {
+            return int_vector<>::max_size();
+        }
+
+        //! Returns if the data strucutre is empty.
+        /*! Required for the Container Concept of the STL.A
+         * \sa size
+         */
+        bool empty()const
+        {
+            return 0==size();
+        }
+
+        //! Returns a const_iterator to the first element.
+        /*! Required for the STL Container Concept.
+         *  \sa end
+         */
+        const_iterator begin()const
+        {
+            return const_iterator(this, 0);
+        }
+
+        //! Returns a const_iterator to the element after the last element.
+        /*! Required for the STL Container Concept.
+         *  \sa begin.
+         */
+        const_iterator end()const
+        {
+            return const_iterator(this, size());
+        }
+
+        //! []-operator
+        /*! \param i Index of the value. \f$ i \in [0..size()-1]\f$.
+         * Required for the STL Random Access Container Concept.
+         * \par Time complexity
+         *      \f$ \Order{s_{SA}\cdot t_{\Psi}} \f$, where every \f$s_{SA}\f$th suffix array entry is sampled and \f$t_{\Psi}\f$
+         *           is the access time for an element in the \f$\Psi\f$-function.
+         */
+        value_type operator[](size_type i)const
+        {
+//            std::cout<<"SA["<<i<<"]"<<std::endl;
+            size_type off = 0;
+            while (!m_sa_sample.is_sampled(i)) {  // while i mod t_dens != 0 (SA[i] is not sampled)
+//                std::cout<<"psi["<<i<<"]=";
+                i = psi[i];                       // go to the position where SA[i]+1 is located
+//                std::cout<<i<<std::endl;
+                ++off;                            // add 1 to the offset
+//                std::cout<<"=SA["<<i<<"]-"<<off<<std::endl;
+            }
+            value_type result = m_sa_sample[i];
+//            std::cout<<"result="<<result<<std::endl;
+            if (result < off) {
+                return psi.size()-(off-result);
+            } else
+                return result-off;
+        }
+
+
+        //! Assignment Copy Operator.
+        /*!
+         *    Required for the Assignable Concept of the STL.
+         */
+        csa_sada2& operator=(const csa_sada2& csa)
+        {
+            if (this != &csa) {
+                m_alphabet   = csa.m_alphabet;
+                m_psi_support = csa.m_psi_support;
+                m_psi_support.set_vector(this);
+                m_sa_sample  = csa.m_sa_sample;
+                m_isa_sample = csa.m_isa_sample;
+                m_isa_sample.set_vector(&m_sa_sample);
+            }
+            return *this;
+        }
+
+        //! Assignment Move Operator.
+        /*!
+         *    Required for the Assignable Concept of the STL.
+         */
+        csa_sada2& operator=(csa_sada2&& csa)
+        {
+            if (this != &csa) {
+                m_alphabet    = std::move(csa.m_alphabet);
+                m_psi_support = std::move(csa.m_psi_support);
+                m_psi_support.set_vector(this);
+                m_sa_sample   = std::move(csa.m_sa_sample);
+                m_isa_sample  = std::move(csa.m_isa_sample);
+                m_isa_sample.set_vector(&m_sa_sample);
+            }
+            return *this;
+        }
+
+        //! Serialize to a stream.
+        /*! \param out Outstream to write the data structure.
+         *  \return The number of written bytes.
+         */
+        size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const
+        {
+            structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
+            size_type written_bytes = 0;
+            written_bytes += m_alphabet.serialize(out, child, "alphabet");
+            written_bytes += m_psi_support.serialize(out, child, "psi");
+            written_bytes += m_sa_sample.serialize(out, child, "sa_samples");
+            written_bytes += m_isa_sample.serialize(out, child, "isa_samples");
+            structure_tree::add_size(child, written_bytes);
+            return written_bytes;
+        }
+
+        //! Load from a stream.
+        /*! \param in Input stream to load the data structure from.
+         */
+        void load(std::istream& in)
+        {
+            m_alphabet.load(in);
+            m_psi_support.load(in);
+            m_psi_support.set_vector(this);
+            m_sa_sample.load(in);
+            m_isa_sample.load(in);
+            m_isa_sample.set_vector(&m_sa_sample);
+        }
+
+    private:
+
+        // Calculates how many symbols c are in the prefix [0..i-1] of the BWT of the original text.
+        /*
+         *  \tpara Type of index. Should either be an unsigned integer or and std::array<,2> of unsigned integers
+         *  \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$.
+         *  \param c The symbol to count in the prefix.
+         *    \returns The number of occurrences of symbol c in the prefix [0..i-1] of the BWT.
+         *  \par Time complexity
+         *        \f$ \Order{\log n t_{\Psi}} \f$
+         */
+        // replace const char_type c by const std::array<char_type, alphabet_type::C_depth>& c
+        template<typename t_pos , typename t_char>
+        t_pos rank_bwt(t_pos i, const t_char c)const
+        {
+            auto cc = char2comp[c];
+            if (cc==0 and c!=0) // character is not in the text => return 0
+                return t_pos {0};
+            if (i == t_pos {0})
+                return t_pos {0};
+            return m_psi_support.rank(i, cc);
+        }
+
+
+        // Calculates the position of the i-th c in the BWT of the original text.
+        /*
+         *  \param i The i-th occurrence. \f$i\in [1..rank_bwt(size(),c)]\f$.
+         *  \param c Symbol c.
+         *    \returns The position of the i-th c in the BWT or size() if c does occur less then i times.
+         *  \par Time complexity
+         *        \f$ \Order{t_{\Psi}} \f$
+         */
+        size_type select_bwt(size_type i, const char_type c)const
+        {
+            assert(i > 0);
+            comp_char_type cc = char2comp[c];
+            if (cc==0 and c!=0)  // character is not in the text => return 0
+                return size();
+            return m_psi_support.select(i, cc);
+        }
+};
+
+// == template functions ==
+
+template<class t_enc_vec, uint32_t t_dens, uint32_t t_inv_dens, class t_sa_sample_strat, class t_isa, class t_alphabet_strat>
+csa_sada2<t_enc_vec, t_dens, t_inv_dens, t_sa_sample_strat, t_isa, t_alphabet_strat>::csa_sada2(cache_config& config)
+{
+    if (!cache_file_exists(key_bwt<alphabet_type::int_width>(), config)) {
+        return;
+    }
+    int_vector_buffer<alphabet_type::int_width> bwt_buf(cache_file_name(key_bwt<alphabet_type::int_width>(),config));
+    size_type n = bwt_buf.size();
+    {
+        auto event = memory_monitor::event("construct csa-alpbabet");
+//        alphabet_type tmp_alphabet(bwt_buf, n); // TODO: maybe it is possible to use _buf_buf again for multibyte!!
+        int_vector_buffer<alphabet_type::int_width> text_buf(cache_file_name(key_text<alphabet_type::int_width>(),config));
+        m_alphabet = alphabet_type(text_buf, n);
+    }
+
+    int_vector<> cnt_chr(sigma, 0, bits::hi(n)+1);
+    for (typename alphabet_type::sigma_type i=0; i < sigma; ++i) {
+        cnt_chr[i] = C[i];
+    }
+    // calculate psi
+    {
+        auto event = memory_monitor::event("construct PSI");
+        int_vector<> psi(n, 0, bits::hi(n)+1);
+        for (size_type i=0; i < n; ++i) {
+            psi[ cnt_chr[ char2comp[bwt_buf[i]] ]++ ] = i;
+        }
+        std::string psi_file = cache_file_name(conf::KEY_PSI, config);
+        if (!store_to_cache(psi, conf::KEY_PSI, config)) {
+            return;
+        }
+    }
+    {
+        auto event = memory_monitor::event("encode PSI");
+        int_vector_buffer<> psi_buf(cache_file_name(conf::KEY_PSI, config));
+        m_psi_support = psi_type(psi_buf, this);
+    }
+    {
+        auto event = memory_monitor::event("sample SA");
+        m_sa_sample = sa_sample_type(config);
+    }
+    {
+        auto event = memory_monitor::event("sample ISA");
+        isa_sample_type isa_s(config, &m_sa_sample);
+        util::swap_support(m_isa_sample, isa_s, &m_sa_sample, (const sa_sample_type*)nullptr);
+    }
+}
+
+} // end namespace sdsl
+#endif
diff --git a/include/sdsl/csa_wt.hpp b/include/sdsl/csa_wt.hpp
index 4a816bc6d..49a23ccef 100644
--- a/include/sdsl/csa_wt.hpp
+++ b/include/sdsl/csa_wt.hpp
@@ -238,22 +238,32 @@ class csa_wt {
 private:
 	// Calculates how many symbols c are in the prefix [0..i-1] of the BWT of the original text.
 	/*
-         *  \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$.
-         *  \param c The symbol to count the occurrences in the prefix.
-         *    \returns The number of occurrences of symbol c in the prefix [0..i-1] of the BWT.
-         *  \par Time complexity
-         *        \f$ \Order{\log |\Sigma|} \f$
-         */
-	size_type rank_bwt(size_type i, const char_type c) const { return m_wavelet_tree.rank(i, c); }
-
-	// Calculates the position of the i-th c in the BWT of the original text.
-	/*
-         *  \param i The i-th occurrence. \f$i\in [1..rank(size(),c)]\f$.
-         *  \param c Symbol c.
-         *    \returns The position of the i-th c in the BWT or size() if c does occur less then i times.
-         *  \par Time complexity
-         *        \f$ \Order{t_{\Psi}} \f$
-         */
+     *  \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$.
+     *  \param c The symbol to count the occurrences in the prefix.
+     *    \returns The number of occurrences of symbol c in the prefix [0..i-1] of the BWT.
+     *  \par Time complexity
+     *        \f$ \Order{\log |\Sigma|} \f$
+     */
+    size_type
+    rank_bwt(size_type i, const char_type c)const
+    {
+        return m_wavelet_tree.rank(i, c);
+    }
+
+    std::array<size_type,2>
+    rank_bwt(std::array<size_type,2> ij, const char_type c)const
+    {
+        return {rank_bwt(ij[0], c), rank_bwt(ij[1],c)};
+    }
+
+    // Calculates the position of the i-th c in the BWT of the original text.
+    /*
+     *  \param i The i-th occurrence. \f$i\in [1..rank(size(),c)]\f$.
+     *  \param c Symbol c.
+     *    \returns The position of the i-th c in the BWT or size() if c does occur less then i times.
+     *  \par Time complexity
+     *        \f$ \Order{t_{\Psi}} \f$
+     */
 	size_type select_bwt(size_type i, const char_type c) const
 	{
 		assert(i > 0);
diff --git a/include/sdsl/enc_vector.hpp b/include/sdsl/enc_vector.hpp
index 6f8db430d..cc6c4eae1 100644
--- a/include/sdsl/enc_vector.hpp
+++ b/include/sdsl/enc_vector.hpp
@@ -258,56 +258,56 @@ template <class t_coder, uint32_t t_dens, uint8_t t_width>
 template <uint8_t int_width>
 enc_vector<t_coder, t_dens, t_width>::enc_vector(int_vector_buffer<int_width>& v_buf)
 {
-	// clear bit_vectors
-	clear();
-	size_type n = v_buf.size();
-	if (n == 0) // if c is empty there is nothing to do...
-		return;
-	value_type		v1 = 0, v2 = 0, max_sample_value = 0;
-	size_type		samples = 0, z_size = 0;
-	const size_type sd = get_sample_dens();
-	//  (1) Calculate maximal value of samples and of deltas
-	for (size_type i = 0, no_sample = 0; i < n; ++i, --no_sample) {
-		v2 = v_buf[i];
-		if (!no_sample) { // is sample
-			no_sample									= sd;
-			if (max_sample_value < v2) max_sample_value = v2;
-			++samples;
-		} else {
-			z_size += t_coder::encoding_length(v2 - v1);
-		}
-		v1 = v2;
-	}
-
-	//    (2) Write sample values and deltas
-	//    (a) Initialize array for sample values and pointers
-	if (max_sample_value > z_size + 1)
-		m_sample_vals_and_pointer.width(bits::hi(max_sample_value) + 1);
-	else
-		m_sample_vals_and_pointer.width(bits::hi(z_size + 1) + 1);
-	m_sample_vals_and_pointer.resize(2 * samples + 2); // add 2 for last entry
-	util::set_to_value(m_sample_vals_and_pointer, 0);
-
-	//    (b) Initilize bit_vector for encoded data
-	m_z				 = int_vector<>(z_size, 0, 1);
-	uint64_t* z_data = t_coder::raw_data(m_z);
-	uint8_t   offset = 0;
-
-	//    (c) Write sample values and deltas
-	z_size = 0;
-	for (size_type i = 0, j = 0, no_sample = 0; i < n; ++i, --no_sample) {
-		v2 = v_buf[i];
-		if (!no_sample) { // is sample
-			no_sample					   = sd;
-			m_sample_vals_and_pointer[j++] = v2;	 // write samples
-			m_sample_vals_and_pointer[j++] = z_size; // write pointers
-		} else {
-			z_size += t_coder::encoding_length(v2 - v1);
-			t_coder::encode(v2 - v1, z_data, offset); // write encoded values
-		}
-		v1 = v2;
-	}
-	m_size = n;
+    // clear bit_vectors
+    clear();
+    size_type n = v_buf.size();
+    if (n == 0)  // if c is empty there is nothing to do...
+        return;
+    value_type     v1=0, v2=0, max_sample_value=0;
+    size_type samples=0, z_size=0;
+    const size_type sd = get_sample_dens();
+//  (1) Calculate maximal value of samples and of deltas
+    for (size_type i=0, no_sample = 0; i < n; ++i, --no_sample) {
+        v2 = v_buf[i];
+        if (!no_sample) { // is sample
+            no_sample = sd;
+            if (max_sample_value < v2) max_sample_value = v2;
+            ++samples;
+        } else {
+            z_size += t_coder::encoding_length(v2-v1);
+        }
+        v1 = v2;
+    }
+
+//    (2) Write sample values and deltas
+//    (a) Initialize array for sample values and pointers
+    if (max_sample_value > z_size+1)
+        m_sample_vals_and_pointer.width(bits::hi(max_sample_value) + 1);
+    else
+        m_sample_vals_and_pointer.width(bits::hi(z_size+1) + 1);
+    m_sample_vals_and_pointer.resize(2*samples+2); // add 2 for last entry
+    util::set_to_value(m_sample_vals_and_pointer, 0);
+
+//    (b) Initilize bit_vector for encoded data
+    m_z = int_vector<>(z_size, 0, 1);
+    uint64_t* z_data = t_coder::raw_data(m_z);
+    uint8_t offset = 0;
+
+//    (c) Write sample values and deltas
+    z_size = 0;
+    for (size_type i=0, j=0, no_sample = 0; i < n; ++i, --no_sample) {
+        v2 = v_buf[i];
+        if (!no_sample) { // is sample
+            no_sample = sd;
+            m_sample_vals_and_pointer[j++] = v2;    // write samples
+            m_sample_vals_and_pointer[j++] = z_size;// write pointers
+        } else {
+            z_size += t_coder::encoding_length(v2-v1);
+            t_coder::encode(v2-v1, z_data, offset);   // write encoded values
+        }
+        v1 = v2;
+    }
+    m_size = n;
 }
 
 template <class t_coder, uint32_t t_dens, uint8_t t_width>
diff --git a/include/sdsl/enc_vector2.hpp b/include/sdsl/enc_vector2.hpp
new file mode 100644
index 000000000..89a08acb4
--- /dev/null
+++ b/include/sdsl/enc_vector2.hpp
@@ -0,0 +1,316 @@
+// Copyright (c) 2016, the SDSL Project Authors.  All rights reserved.
+// Please see the AUTHORS file for details.  Use of this source code is governed
+// by a BSD license that can be found in the LICENSE file.
+/*! \file enc_vector2.hpp
+   \brief enc_vector2.hpp contains the sdsl::enc_vector2 class.
+   \author Simon Gog
+*/
+#ifndef SDSL_ENC_VECTORII
+#define SDSL_ENC_VECTORII
+
+#include "int_vector.hpp"
+#include "coder.hpp"
+#include "iterators.hpp"
+
+
+//! Namespace for the succinct data structure library.
+namespace sdsl
+{
+
+template<uint8_t t_width>
+struct enc_vector2_trait {
+    typedef int_vector<0> int_vector_type;
+};
+
+template<>
+struct enc_vector2_trait<32> {
+    typedef int_vector<32> int_vector_type;
+};
+
+template<>
+struct enc_vector2_trait<64> {
+    typedef int_vector<64> int_vector_type;
+};
+
+//! A generic immutable space-saving vector class for unsigned integers.
+/*! A vector v is stored more space-efficiently by self-delimiting coding
+ *  the deltas v[i+1]-v[i] (v[-1]:=0). Space of the structure and random
+ *  access time to it can be controlled by a sampling parameter t_dens.
+ *
+ *  \tparam t_coder  Self-delimiting coder.
+ *  \tparam t_dens   Every t_dens-th element of v is sampled.
+ *  \tparam t_width  Width of the int_vector used to store the samples and pointers.
+ *  This class is a parameter of csa_sada.
+ * @ingroup int_vector
+ */
+template<class t_coder=coder::elias_delta,
+         uint32_t t_dens = 128, uint8_t t_width=0>
+class enc_vector2
+{
+    private:
+        static_assert(t_dens > 1 , "enc_vector2: sample density must be larger than `1`");
+    public:
+        typedef uint64_t                                 value_type;
+        typedef random_access_const_iterator<enc_vector2> iterator;
+        typedef iterator                                 const_iterator;
+        typedef const value_type                         reference;
+        typedef const value_type                         const_reference;
+        typedef const value_type*                        const_pointer;
+        typedef ptrdiff_t                                difference_type;
+        typedef int_vector<>::size_type                  size_type;
+        typedef t_coder                                  coder;
+        typedef typename enc_vector2_trait<t_width>::int_vector_type int_vector_type;
+        typedef iv_tag                                   index_category;
+        static  constexpr uint32_t                       sample_dens    = t_dens;
+        typedef enc_vector2                              enc_vec_type;
+
+        int_vector<0>     m_z;                       // storage for encoded deltas
+    private:
+        int_vector_type            m_samples;        // samples
+        sd_vector<>                m_pointers;
+        sd_vector<>::select_1_type m_pointers_sel;
+        size_type                  m_size = 0;       // number of vector elements
+
+        void clear()
+        {
+            m_z.resize(0);
+            m_size = 0;
+            m_samples.resize(0);
+            m_pointers = sd_vector<>();
+        }
+
+    public:
+        enc_vector2() = default;
+        enc_vector2(const enc_vector2&) = default;
+        enc_vector2(enc_vector2&&) = default;
+        enc_vector2& operator=(const enc_vector2&) = default;
+        enc_vector2& operator=(enc_vector2&&) = default;
+
+        //! Constructor for a Container of unsigned integers.
+        /*! \param c A container of unsigned integers.
+          */
+        template<class Container>
+        enc_vector2(const Container& c);
+
+        //! Constructor for an int_vector_buffer of unsigned integers.
+        /*
+            \param v_buf A int_vector_buf.
+        */
+        template<uint8_t int_width>
+        enc_vector2(int_vector_buffer<int_width>& v_buf);
+
+        //! Default Destructor
+        ~enc_vector2() { }
+
+        //! The number of elements in the enc_vector2.
+        size_type size()const
+        {
+            return m_size;
+        }
+
+        //! Return the largest size that this container can ever have.
+        static size_type max_size()
+        {
+            return int_vector<>::max_size()/2;
+        }
+
+        //!    Returns if the enc_vector2 is empty.
+        bool empty() const
+        {
+            return 0==m_size;
+        }
+
+        //! Iterator that points to the first element of the enc_vector2.
+        const const_iterator begin()const
+        {
+            return const_iterator(this, 0);
+        }
+
+        //! Iterator that points to the position after the last element of the enc_vector2.
+        const const_iterator end()const
+        {
+            return const_iterator(this, this->m_size);
+        }
+
+        //! operator[]
+        /*! \param i Index. \f$ i \in [0..size()-1]\f$.
+         */
+        value_type operator[](size_type i)const;
+
+        //! Serialize the enc_vector2 to a stream.
+        /*! \param out Out stream to write the data structure.
+            \return The number of written bytes.
+         */
+        size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const;
+
+        //! Load the enc_vector2 from a stream.
+        void load(std::istream& in);
+
+        //! Returns the i-th sample of enc_vector2
+        /*! \param i The index of the sample. 0 <= i < size()/get_sample_dens()
+         *  \return The value of the i-th sample.
+         */
+        value_type sample(const size_type i) const;
+
+        uint32_t get_sample_dens() const
+        {
+            return t_dens;
+        }
+
+        /*!
+         * \param i The index of the sample for which all values till the next sample should be decoded. 0 <= i < size()/get_sample_dens()
+         * \param it A pointer to a uint64_t vector, whereto the values should be written
+         */
+        void get_inter_sampled_values(const size_type i, uint64_t* it)const
+        {
+            // TODO: this will not work for blocks with m_pointers_sel(i+1)+t_dens==m_pointers_sel(i+2)
+            *(it++) = 0;
+            if (i*t_dens + t_dens - 1 < size()) {
+                if (i+1 < m_samples.size() and m_samples[i] + t_dens == m_samples[i+1]) {
+                    if (m_pointers_sel(i+1) != m_pointers_sel(i+2)) {
+                        throw std::logic_error("Should not be here");
+                    }
+                    uint64_t x = 1;
+                    while (x < t_dens) {
+                        *(it++) = x;
+                        ++x;
+                    }
+//                    throw std::logic_error("Should not be here");
+                } else {
+                    t_coder::template decode<true, true>(m_z.data(), m_pointers_sel(i+1), t_dens - 1, it);
+                }
+            } else {
+                assert(i*t_dens < size());
+                t_coder::template decode<true, true>(m_z.data(), m_pointers_sel(i+1), size()-i*t_dens - 1, it);
+            }
+        };
+};
+
+template<class t_coder, uint32_t t_dens, uint8_t t_width>
+inline typename enc_vector2<t_coder, t_dens,t_width>::value_type enc_vector2<t_coder, t_dens,t_width>::operator[](const size_type i)const
+{
+    assert(i+1 != 0);
+    assert(i < m_size);
+    size_type idx = i/get_sample_dens();
+    if (idx+1 < m_samples.size() and m_samples[idx]+t_dens == m_samples[idx+1]) {
+        return m_samples[idx] + i-t_dens*idx;
+    }
+    return m_samples[idx] + t_coder::decode_prefix_sum(m_z.data(), m_pointers_sel(idx+1), i-t_dens*idx);
+}
+
+template<class t_coder, uint32_t t_dens, uint8_t t_width>
+inline typename enc_vector2<t_coder, t_dens,t_width>::value_type enc_vector2<t_coder, t_dens,t_width>::sample(const size_type i)const
+{
+    assert(i*get_sample_dens()+1 != 0);
+    assert(i*get_sample_dens() < m_size);
+    return m_samples[i];
+}
+
+template<class t_coder, uint32_t t_dens, uint8_t t_width>
+template<uint8_t int_width>
+enc_vector2<t_coder, t_dens,t_width>::enc_vector2(int_vector_buffer<int_width>& v_buf)
+{
+    // clear bit_vectors
+    clear();
+    size_type n = v_buf.size();
+    if (n == 0)  // if c is empty there is nothing to do...
+        return;
+    value_type     v1=0, v2=0, max_sample_value=0;
+    size_type samples=0, z_size=0;
+    const size_type sd = get_sample_dens();
+    size_type tmp_z = 0;
+    bool uniform = true;
+//  (1) Calculate maximal value of samples and of deltas
+    for (size_type i=0, no_sample = 0; i < n; ++i, --no_sample) {
+        v2 = v_buf[i];
+        if (!no_sample) { // is sample
+            uniform &= (v2==v1+1);
+            if (!uniform) {
+                z_size += tmp_z;
+            }
+            uniform = true;
+            tmp_z = 0;
+            no_sample = sd;
+            if (max_sample_value < v2) max_sample_value = v2;
+            ++samples;
+        } else {
+            uniform &= (v2==v1+1);
+            tmp_z += t_coder::encoding_length(v2-v1);
+        }
+        v1 = v2;
+    }
+    z_size += tmp_z;
+
+//  (2) Write sample values and deltas
+//  (a) Initialize array for sample values and pointers
+    m_samples = int_vector<>(samples+1, 0, bits::hi(max_sample_value)+1);
+
+    sd_vector_builder builder(z_size, samples);
+
+//  (b) Initilize bit_vector for encoded data
+    m_z = int_vector<>(z_size, 0, 1);
+    uint64_t* z_data = t_coder::raw_data(m_z);
+    uint8_t offset = 0;
+
+//  (c) Write sample values and deltas
+    z_size = 0;
+    tmp_z = 0;
+    uniform = true;
+    std::vector<uint64_t> delta;
+    for (size_type i=0, j=0, no_sample = 0; i < n; ++i, --no_sample) {
+        v2 = v_buf[i];
+        if (!no_sample) { // is sample
+            uniform &= (v2==v1+1);
+            if (!uniform) {
+                for (size_t k=0; k<delta.size(); ++k) {
+                    z_size += t_coder::encoding_length(delta[k]);
+                    t_coder::encode(delta[k], z_data, offset); // write encoded data
+                }
+            }
+            delta.clear();
+            uniform = true;
+            no_sample = sd;
+            m_samples[j++] = v2;    // write samples
+            builder.set(z_size);
+        } else {
+            uniform &= (v2==v1+1);
+            delta.push_back(v2-v1);
+        }
+        v1 = v2;
+    }
+    for (size_t k=0; k<delta.size(); ++k) {
+        t_coder::encode(delta[k], z_data, offset); // write encoded data
+        z_size += t_coder::encoding_length(delta[k]);
+    }
+    m_size = n;
+    m_pointers = sd_vector<>(builder);
+    m_pointers_sel.set_vector(&m_pointers);
+}
+
+template<class t_coder, uint32_t t_dens, uint8_t t_width>
+enc_vector2<>::size_type enc_vector2<t_coder, t_dens,t_width>::serialize(std::ostream& out, structure_tree_node* v, std::string name)const
+{
+    structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
+    size_type written_bytes = 0;
+    written_bytes += write_member(m_size, out, child, "size");
+    written_bytes += m_z.serialize(out, child, "encoded deltas");
+    written_bytes += m_samples.serialize(out, child, "samples");
+    written_bytes += m_pointers.serialize(out, child, "pointers");
+    written_bytes += m_pointers_sel.serialize(out, child, "pointers_sel");
+    structure_tree::add_size(child, written_bytes);
+    return written_bytes;
+}
+
+template<class t_coder, uint32_t t_dens, uint8_t t_width>
+void enc_vector2<t_coder, t_dens,t_width>::load(std::istream& in)
+{
+    read_member(m_size, in);
+    m_z.load(in);
+    m_samples.load(in);
+    m_pointers.load(in);
+    m_pointers_sel.load(in);
+    m_pointers_sel.set_vector(&m_pointers);
+}
+
+} // end namespace sdsl
+#endif
diff --git a/include/sdsl/hyb_sd_vector.hpp b/include/sdsl/hyb_sd_vector.hpp
new file mode 100644
index 000000000..2b78de755
--- /dev/null
+++ b/include/sdsl/hyb_sd_vector.hpp
@@ -0,0 +1,1426 @@
+// Copyright (c) 2016, the SDSL Project Authors.  All rights reserved.
+// Please see the AUTHORS file for details.  Use of this source code is governed
+// by a BSD license that can be found in the LICENSE file.
+/*!\file sd_vector.hpp
+   \brief sd_vector.hpp contains the sdsl::sd_vector class, and
+          classes which support rank and select for sd_vector.
+   \author Simon Gog, Matthias Petri
+*/
+#ifndef INCLUDED_SDSL_HYB_SD_VECTOR
+#define INCLUDED_SDSL_HYB_SD_VECTOR
+
+#include "int_vector.hpp"
+#include "sd_vector.hpp"
+#include "coder.hpp"
+#include "util.hpp"
+#include "iterators.hpp"
+
+//! Namespace for the succinct data structure library
+namespace sdsl
+{
+
+template <class t_itr>
+std::string print_vec(t_itr beg, t_itr end)
+{
+    std::string str = "[";
+    auto itr = beg;
+    while (itr != (end - 1)) {
+        str += std::to_string(*itr) + " ";
+        ++itr;
+    }
+    str += std::to_string(*itr) + "]";
+    return str;
+}
+
+inline uint64_t next0(const uint64_t* word, uint64_t idx)
+{
+    word += (idx >> 6);
+    auto masked_inverse_word = ~(*word | bits::lo_set[(idx & 0x3F) + 1]);
+    if (masked_inverse_word) {
+        return (idx & ~((size_t)0x3F)) + bits::lo(masked_inverse_word);
+    }
+    idx = (idx & ~((size_t)0x3F)) + 64;
+    ++word;
+    while (*word == 0xFFFFFFFFFFFFFFFFULL) {
+        idx += 64;
+        ++word;
+    }
+    return idx + bits::lo(~(*word));
+}
+
+/*!
+ * \param word Beginning of bit_vector (represented as sequence of uint64_t words)
+ * \param idx  Initial scanning position (in bits)
+ * \param i    i
+ * \return The number of set bits up to position i (exlusive)
+ */
+template <uint16_t t_block_size>
+inline uint64_t cnt(const uint64_t* word, uint64_t idx, uint64_t i)
+{
+//    std::cout<<"cnt("<<idx<<", "<<i<<")"<<std::endl;
+    word += (idx >> 6);
+    auto offset = idx & 0x3F;
+//    std::cout<<"offset="<<offset<<std::endl;
+    uint64_t w = (*word) >> offset;
+    uint64_t pre_considered = 0;
+    uint64_t considered = 64 - offset;
+//    std::cout<<"considered="<<considered<<std::endl;
+    uint64_t res = 0;
+    while (considered < i) {
+        res += bits::cnt(w);
+//        std::cout<<"res="<<res<<" after "<<considered<<" bits"<<std::endl;
+        if (res >= t_block_size) {
+            return t_block_size;
+        }
+        pre_considered = considered;
+        considered += 64;
+        w = *(++word);
+    }
+
+//std::cout<<"considered="<<considered<<std::endl;
+    // considered \in [i+0..i+63]
+    if (i == considered) {
+        return res + bits::cnt(w);
+    }
+//std::cout<<"pre_considered="<<pre_considered<<std::endl;
+    i = i-pre_considered;//i + 64 - considered;
+    w &= bits::lo_set[i];
+    res += bits::cnt(w);
+    return res > t_block_size ? t_block_size : res; // TODO: is the space reduction large enough to justify this  ???
+}
+/*!
+ * \param word Beginning of bit_vector (represented as sequence of uint64_t words)
+ * \param idx  Initial scanning position (in bits)
+ * \param i    i
+ * \return The absolut position (in bits) of the i-th set bit (\f$ i>0 \f$ from idx
+ */
+inline uint64_t sel(const uint64_t* word, uint64_t idx, uint64_t i)
+{
+    --i;
+    word += (idx >> 6);
+    auto masked_word = *word & ~bits::lo_set[(idx & 0x3F) + 1];
+    auto one_cnt = bits::cnt(masked_word);
+    if (one_cnt >= i) {
+        return (idx & ~((size_t)0x3F)) + bits::sel(masked_word, i);
+    }
+    idx = (idx & ~((size_t)0x3F)) + 64;
+    i -= one_cnt;
+    ++word;
+    one_cnt = bits::cnt(*word);
+    while (i > one_cnt) {
+        ++word;
+        idx += 64;
+        i -= one_cnt;
+        one_cnt = bits::cnt(*word);
+    }
+    return idx + bits::sel(*word, i);
+}
+
+/*!
+ * \param word Beginning of bit_vector (represented as sequence of uint64_t words)
+ * \param idx  Initial scanning position (in bits)
+ * \param i    i
+ * \return The absolut position (in bits) of the i-th unset bit (\f$ i>0 \f$ from idx
+ */
+inline uint64_t sel0(const uint64_t* word, uint64_t idx, uint64_t i)
+{
+    //    std::cout<<"cnt("<<idx<<", "<<i<<")"<<std::endl;
+    word += (idx >> 6);
+    auto offset = idx & 0x3F;
+    uint64_t w = (~(*word)) >> offset;
+    uint64_t considered = 64 - offset;
+    uint64_t res = 0;
+    uint64_t cnt = 0;
+    uint64_t word_cnt = bits::cnt(w);
+
+    while (cnt + word_cnt < i) {
+        cnt += word_cnt;
+        res = considered;
+        considered += 64;
+        w = (~(*(++word)));
+        word_cnt = bits::cnt(w);
+    }
+    // cnt < i and cnt+word_cnt >= i
+    // add select (i-cnt) to res
+    res += bits::sel(w, i - cnt);
+    return res;
+}
+
+template <uint64_t t_block_size>
+class hyb_sd_block_bv
+{
+    public:
+        typedef bit_vector::size_type size_type;
+        typedef size_type value_type;
+
+    public:
+        static size_type estimate_size(size_type u)
+        {
+            return u;
+        }
+        static size_type
+        serialize(bit_vector& bv, size_type offset, int_vector<64>& data, size_type)
+        {
+            for (size_t i = 0; i < data[t_block_size - 1]+1; ++i)
+                bv[offset + i] = 0;
+            for (size_type i = 0; i < t_block_size; ++i) {
+                bv[offset + data[i]] = 1;
+            }
+            return data[t_block_size - 1] + 1;
+        }
+
+        static size_type select_1(const bit_vector& bv, size_type offset, size_type i, size_type)
+        {
+            return sel(bv.data(), offset, i + 1) - offset;
+        }
+
+        static size_type
+        rank_1(const bit_vector& bv, const int_vector<>& block_start, size_type block_id, size_type i, size_type)
+        {
+            auto offset = block_start[block_id];
+            auto next_offset = block_start[block_id+1];
+            if (i > next_offset-offset)
+                return t_block_size;
+            return cnt<t_block_size>(bv.data(), offset, i);
+        }
+
+        static std::array<size_type,2>
+        rank_1(const bit_vector& bv, const int_vector<>& block_start, size_type block_id, const std::array<size_type,2>& ij, size_type)
+        {
+            auto offset = block_start[block_id];
+            auto next_offset = block_start[block_id+1];
+            if (ij[0] > next_offset-offset) {
+                return {{t_block_size,t_block_size}};
+            }
+            auto resi =  cnt<t_block_size>(bv.data(), offset, ij[0]);
+            if (ij[1] > next_offset-offset) {
+                return {resi, t_block_size};
+            }
+            return {resi, resi+cnt<t_block_size>(bv.data(), offset+ij[0], ij[1]-ij[0])};
+        }
+
+};
+
+
+template <uint64_t t_block_size, typename t_coder=coder::elias_delta>
+class hyb_sd_block_rl
+{
+    public:
+        typedef bit_vector::size_type size_type;
+        typedef size_type value_type;
+
+    public:
+        static size_type encode(int_vector<64>& data, bit_vector* bv=nullptr, size_type offset=0)
+        {
+            uint64_t* data_ptr = nullptr;
+            uint8_t in_word_offset = offset % 64;
+            if (bv != nullptr) {
+                data_ptr = bv->data() + (offset / 64);
+            }
+            auto do_encode = [&](uint64_t x) {
+                if (data_ptr != nullptr) {
+                    t_coder::encode(x, data_ptr, in_word_offset);
+                }
+                return t_coder::encoding_length(x);
+            };
+
+            size_type rl_bits = 0;
+            size_t begin = 0, end = 1;
+            while (end < data.size()) {
+                uint64_t delta = data[end]-data[begin];
+                if (delta > end-begin) {
+                    if (end-begin == 1) {
+                        rl_bits += do_encode(delta);
+                    } else { // end-begin > 1
+                        rl_bits += do_encode(1);
+                        rl_bits += do_encode(end-1-begin);
+                        rl_bits += do_encode(data[end]-data[end-1]);
+                    }
+                    begin = end;
+                    ++end;
+                } else {
+                    ++end;
+                }
+            }
+            if (end-begin > 1) {
+//                rl_bits += do_encode(1);
+//                rl_bits += do_encode(end-1-begin);
+            }
+            /*
+                        if ( bv!=nullptr ) {
+            //                std::cout<<"Checking block "<<std::endl;
+            //                std::cout<<"rl_bits="<<rl_bits<<std::endl;
+            //                std::cout<<"data="<<data<<std::endl;
+                            in_word_offset = offset % 64;
+                            data_ptr = bv->data() + (offset / 64);
+                            auto end_offset = offset + rl_bits;
+                            auto temp = decode(data_ptr, in_word_offset, bv->data() + (end_offset/64), end_offset%64);
+                            if ( data.size() != temp.size() ){
+                                std::cout<<"Error in RL block; size is different!!!"<<std::endl;
+                                throw std::logic_error("error in RL block");
+                            }
+                            for(size_t i=0; i<data.size(); ++i){
+                                if(data[i]!=temp[i]) {
+                                    std::cout<<"Error in RL block; decoded value is different!!!"<<std::endl;
+                                    std::cout<<"data["<<i<<"]="<< data[i] <<" != "<<temp[i]<<" = temp[i]"<<std::endl;
+                                    throw std::logic_error("error in RL block");
+                                }
+                            }
+                        }
+            */
+            return rl_bits;
+        }
+
+        static int_vector<64> decode(const uint64_t* data_ptr, uint8_t offset, const uint64_t* data_ptr_end, uint8_t offset_end)
+        {
+            int_vector<64> data(t_block_size, 0);
+            size_t pos = 1; // data[0]=0, now decode for pos > 0
+            while (pos < t_block_size) {
+                if (data_ptr > data_ptr_end or (data_ptr == data_ptr_end and offset >= offset_end)) {
+//                    std::cout<<"entering corner case"<<std::endl;
+                    while (pos < t_block_size) {
+                        data[pos] = data[pos-1]+1;
+//                        std::cout<<"writing entry "<<pos<<" data[pos]="<<data[pos]<<std::endl;
+                        ++pos;
+                    }
+                    return data;
+                }
+                uint64_t delta = t_coder::decode(data_ptr,offset);
+                if (delta == 1) {   // encoded run of ones of length >= 1
+                    uint64_t len = t_coder::decode(data_ptr, offset);
+                    for (size_t i=0; i<len; ++i) {
+                        data[pos] = data[pos-1]+1;
+                        ++pos;
+                    }
+                } else { // single delta
+                    data[pos] = data[pos-1]+delta;
+                    ++pos;
+                }
+            }
+            return data;
+        }
+
+        static size_type estimate_size(size_type, int_vector<64>& data)
+        {
+            return encode(data);
+        }
+
+        static size_type
+        serialize(bit_vector& bv, size_type offset, int_vector<64>& data, size_type)
+        {
+            return encode(data, &bv, offset);
+        }
+
+//        static size_type select_1(const bit_vector& bv, size_type offset, size_type i, size_type)
+        static size_type
+        select_1(const bit_vector& bv, const int_vector<>& block_start, size_type block_id, size_type i, size_type)
+        {
+            auto offset_begin = block_start[block_id];
+            auto offset_end = block_start[block_id+1];
+            return decode(bv.data()+(offset_begin/64), offset_begin%64, bv.data()+(offset_end/64), offset_end%64)[i];
+        }
+
+        static size_type rank_1(const bit_vector& bv, const int_vector<>& block_start, size_type block_id, size_type i, size_type)
+        {
+            auto abs_offset = block_start[block_id];
+            auto data_ptr = bv.data()+(abs_offset/64);
+            uint8_t offset = abs_offset%64;
+            auto abs_offset_end = block_start[block_id+1];
+            auto data_ptr_end = bv.data()+(abs_offset_end/64);
+            uint8_t offset_end = abs_offset_end%64;
+
+            uint64_t data_res = 0;
+            size_t res = 0; // data[0]=0, now decode for pos > 0
+            while (res < t_block_size and i > data_res) {
+                if (data_ptr > data_ptr_end or (data_ptr == data_ptr_end and offset >= offset_end)) {
+                    uint64_t len = t_block_size - res;
+                    data_res += len;
+                    if (i > data_res) {
+                        res += len;
+                    } else { // i <= data_res and i > data_res - len
+                        uint64_t gap = i - (data_res - len);
+                        res += gap;
+                    }
+                } else {
+                    uint64_t delta = t_coder::decode(data_ptr,offset);
+                    if (delta == 1) {   // encoded run of ones of length >= 1
+                        uint64_t len = t_coder::decode(data_ptr, offset);
+                        data_res += len;
+                        if (i > data_res) {
+                            res += len;
+                        } else { // i <= data_res and i > data_res - len
+                            uint64_t gap = i - (data_res - len);
+                            res += gap;
+                        }
+                    } else { // single delta
+                        data_res += delta;
+                        ++res;
+                    }
+                }
+            }
+//            auto check_res = trivial_rank_1(bv, block_start, block_id, i, 0);
+//            if ( check_res != res ) {
+//                std::cout<<"block_id="<<block_id<<" res="<<res<<" check_res="<<check_res<<std::endl;
+//            }
+            return res;
+        }
+
+        static std::array<size_type,2>
+        rank_1(const bit_vector& bv, const int_vector<>& block_start, size_type block_id, const std::array<size_type,2>& ij, size_type)
+        {
+            auto abs_offset = block_start[block_id];
+            auto data_ptr = bv.data()+(abs_offset/64);
+            uint8_t offset = abs_offset%64;
+            auto abs_offset_end = block_start[block_id+1];
+            auto data_ptr_end = bv.data()+(abs_offset_end/64);
+            uint8_t offset_end = abs_offset_end%64;
+
+            uint64_t data_res = 0;
+            std::array<size_type,2> res = {{0,0}}; // data[0]=0, now decode for pos > 0
+            size_t k=0;
+            for (; k<2; ++k) {
+                while (res[k] < t_block_size and ij[k] > data_res) {
+                    if (data_ptr > data_ptr_end or (data_ptr == data_ptr_end and offset >= offset_end)) {
+                        uint64_t len = t_block_size - res[k];
+                        data_res += len;
+                        if (ij[k] > data_res) {
+                            res[k] += len;
+                        } else { // ij[k] <= data_res and i > data_res - len
+                            uint64_t gap = ij[k] - (data_res -len);
+                            res[k] += gap;
+                            if (k == 0) {
+                                if (ij[1] <= data_res) {
+                                    res[1] = res[0] + (ij[1]-ij[0]);
+                                    k = 3; break;
+                                } else {
+                                    res[1] = res[0] - gap + len;
+                                }
+                            }
+                        }
+                    } else {
+                        uint64_t delta = t_coder::decode(data_ptr,offset);
+                        if (delta == 1) {   // encoded run of ones of length >= 1
+                            uint64_t len = t_coder::decode(data_ptr, offset);
+                            data_res += len;
+                            if (ij[k] > data_res) {
+                                res[k] += len;
+                            } else { // ij[k] <= data_res and i > data_res-len
+                                uint64_t gap = ij[k] - (data_res-len);
+                                res[k] += gap;
+                                if (k == 0) {
+                                    if (ij[1] <= data_res) {
+                                        res[1] = res[0] + (ij[1]-ij[0]);
+                                        k = 3; break;
+                                    } else {
+                                        res[1] = res[0] - gap + len;
+                                    }
+                                }
+                            }
+                        } else { // single delta
+                            data_res += delta;
+                            ++res[k];
+                        }
+                    }
+                }
+                if (k==0) {
+                    res[1]= std::max(res[0], res[1]);
+                }
+            }
+            /*
+                        std::array<size_type,2> check = { rank_1(bv, block_start, block_id, ij[0], 0),
+                                                          rank_1(bv, block_start, block_id, ij[1], 0)};
+                        if ( res != check ){
+                            std::cerr<<"res!=check"<<std::endl;
+                            std::cout<<"res=["<<res[0]<<","<<res[1]<<"] != ["<<check[0]<<","<<check[1]<<"] k="<<k << std::endl;
+            //                throw std::logic_error("check failed");
+                            return check;
+                        }
+            */
+            return res;
+        }
+
+
+
+        static size_type
+        trivial_rank_1(const bit_vector& bv, const int_vector<>& block_start, size_type block_id, size_type i, size_type)
+//        rank_1(const bit_vector& bv, const int_vector<>& block_start, size_type block_id, size_type i, size_type)
+        {
+            auto offset_begin = block_start[block_id];
+            auto offset_end = block_start[block_id+1];
+            auto data = decode(bv.data()+(offset_begin/64), offset_begin%64, bv.data()+(offset_end/64), offset_end%64);
+            size_type res = 0;
+            while (res < data.size() and i > data[res])
+                ++res;
+            return res;
+        }
+
+        /*
+                static std::array<size_type,2>
+                rank_1(const bit_vector& bv, const int_vector<>& block_start, size_type block_id, const std::array<size_type,2>& ij, size_type)
+                {
+                    auto offset = block_start[block_id];
+                    auto data = decode(bv.data()+(offset/64), offset%64);
+                    std::array<size_type,2> res = {0,0};
+                    while ( res[0] < data.size() and ij[0] > data[res[0]] )
+                        ++res[0];
+                    res[1] = res[0];
+                    while ( res[1] < data.size() and ij[1] > data[res[1]] )
+                        ++res[1];
+                    return res;
+                }
+        */
+};
+
+
+
+template <uint64_t t_block_size>
+class hyb_sd_block_full
+{
+    public:
+        typedef bit_vector::size_type size_type;
+        typedef size_type value_type;
+
+    public:
+        static size_type estimate_size(size_type u)
+        {
+            return (t_block_size == u) ? 0 : std::numeric_limits<size_type>::max();
+        }
+
+        static size_type
+        serialize(bit_vector&, size_type, int_vector<64>&, size_type u)
+        {
+            if (t_block_size != u) {
+                std::cerr << "this should not happen!" << std::endl;
+            }
+            return 0;
+        }
+
+        static size_type select_1(const bit_vector&, size_type, size_type i, size_type)
+        {
+            return i;
+        }
+
+        static size_type rank_1(const bit_vector&, const int_vector<>&, size_type, size_type i, size_type)
+        {
+            return std::min(t_block_size, i);
+        }
+};
+
+template <uint64_t t_block_size>
+class hyb_sd_block_ef
+{
+    public:
+        typedef bit_vector::size_type size_type;
+        typedef size_type value_type;
+
+    private:
+        // TODO factor out calculation of logu and logm
+
+    public:
+        static size_type estimate_size(size_type u)
+        {
+            uint8_t logu = bits::hi(u) + 1;
+            uint8_t logm = bits::hi(t_block_size) + 1; // TODO constexpr for hi?
+            if (logm == logu)
+                logm--;
+            size_type width_low = logu - logm;
+            size_type size_in_bits = width_low * t_block_size + (1ULL << logm) + t_block_size + 1;
+            return size_in_bits;
+        }
+        static size_type
+        serialize(bit_vector& bv, size_type offset, int_vector<64>& data, size_type u)
+        {
+            size_type written_bits = 0;
+            uint8_t logu = bits::hi(u) + 1;
+            uint8_t logm = bits::hi(t_block_size) + 1;
+            if (logm == logu)
+                logm--;
+            size_type width_low = logu - logm;
+            /* write low */
+            auto data_ptr = bv.data() + (offset / 64);
+            uint8_t in_word_offset = offset % 64;
+            for (size_type i = 0; i < t_block_size; i++) {
+                uint64_t x = data[i];
+                bits::write_int_and_move(data_ptr, x, in_word_offset, width_low);
+            }
+            written_bits += width_low * t_block_size;
+
+            /* write high */
+            size_type last_high = 0;
+            for (size_type i = 0; i < t_block_size; i++) {
+                uint64_t x = data[i];
+                size_type cur_high = x >> width_low;
+                size_type write_val = cur_high - last_high;
+                while (write_val >= 64) {
+                    bits::write_int_and_move(data_ptr, 0ULL, in_word_offset, 64);
+                    write_val -= 64;
+                    written_bits += 64;
+                }
+                bits::write_int_and_move(data_ptr, 1ULL << write_val, in_word_offset, write_val + 1);
+                last_high = cur_high;
+                written_bits += write_val + 1;
+            }
+            bv[offset+written_bits] = 0;
+            ++written_bits;
+            return written_bits;
+        }
+
+        static size_type select_1(const bit_vector& bv, size_type offset, size_type i, size_type u)
+        {
+            uint8_t logu = bits::hi(u) + 1;
+            uint8_t logm = bits::hi(t_block_size) + 1;
+            if (logm == logu)
+                logm--;
+            size_type width_low = logu - logm;
+            size_type hi_part_offset = offset + t_block_size * width_low;
+            size_type low_part_offset = offset + i * width_low;
+
+            auto low_part_data_ptr = bv.data() + (low_part_offset / 64);
+            uint8_t low_part_in_word_offset = low_part_offset % 64;
+            auto low_part = bits::read_int(low_part_data_ptr, low_part_in_word_offset, width_low);
+
+            auto bucket = sel(bv.data(), hi_part_offset, i + 1) - hi_part_offset - i;
+            return (bucket << width_low) | low_part;
+        }
+
+        static size_type rank_1(const bit_vector& bv, const int_vector<>& block_start, size_type block_id, size_type i, size_type u)
+        {
+//std::cout<<">>>>>>>>rank_1("<<i<<")!!!"<<std::endl;
+            auto offset = block_start[block_id];
+            auto next_offset = block_start[block_id + 1];
+
+            uint8_t logu = bits::hi(u) + 1;
+            uint8_t logm = bits::hi(t_block_size) + 1;
+            if (logm == logu)
+                logm--;
+            size_type width_low = logu - logm;
+
+            size_type hi_part_offset = offset + t_block_size * width_low;
+            size_type hi_size = next_offset - hi_part_offset;
+
+            size_type high_val = (i >> width_low);
+            size_type zeros_in_high = hi_size - t_block_size;
+            if (zeros_in_high < high_val+1) {
+                return t_block_size;
+            }
+            size_type local_sel = sel0(bv.data(), hi_part_offset, high_val + 1);
+
+            size_type sel_high = local_sel;
+            size_type rank_low = sel_high - high_val;
+            if (0 == rank_low) {
+                return 0;
+            }
+
+            size_type low_part_offset = offset + rank_low * width_low;
+            size_type val_low = i & bits::lo_set[width_low];
+            auto low_part_data_ptr = bv.data() + (low_part_offset / 64);
+            uint8_t low_part_in_word_offset = low_part_offset % 64;
+
+//std::cout<<"_sel_high="<<sel_high<<" rank_low="<<rank_low<<std::endl;
+            do {
+                if (!sel_high)
+                    return 0;
+                --rank_low;
+                --sel_high;
+                low_part_offset -= width_low;
+                low_part_data_ptr = bv.data() + (low_part_offset / 64);
+                low_part_in_word_offset = low_part_offset % 64;
+//std::cout<<">>sel_high "<<bv[hi_part_offset+sel_high];
+//if ( bv[hi_part_offset+sel_high] ) {
+//    std::cout<<" i="<<i<<" "<< (bits::read_int(low_part_data_ptr, low_part_in_word_offset, width_low)|(high_val<<width_low))
+//             <<" rank_low="<<rank_low;
+//}
+//std::cout<<std::endl;
+            } while (bv[hi_part_offset + sel_high] and bits::read_int(low_part_data_ptr, low_part_in_word_offset, width_low) >= val_low);
+            return rank_low + 1;
+        }
+
+        static std::array<size_type,2>
+        rank_1(const bit_vector& bv, const int_vector<>& block_start, size_type block_id, std::array<size_type,2> ij, size_type u)
+        {
+            auto start_offset = block_start[block_id];
+            auto next_offset = block_start[block_id + 1];
+
+            uint8_t logu = bits::hi(u) + 1;
+            uint8_t logm = bits::hi(t_block_size) + 1;
+            if (logm == logu)
+                logm--;
+            size_type width_low = logu - logm;
+
+            size_type hi_part_offset = start_offset + t_block_size * width_low;
+            size_type hi_size = next_offset - hi_part_offset;
+
+            std::array<size_type,2> high_val = {{(ij[0] >> width_low),(ij[1] >> width_low)}};
+
+            size_type zeros_in_high = hi_size - t_block_size;
+//std::cout<<"zeros_in_high="<<zeros_in_high<<std::endl;
+//std::cout<<"hi_size="<<hi_size<<std::endl;
+//std::cout<<"high_val[0]+1="<<high_val[0]+1<<std::endl;
+//std::cout<<"high_val[1]+1="<<high_val[1]+1<<std::endl;
+            if (zeros_in_high < high_val[0]+1) {    // check if there is a zero to select
+                return {t_block_size, t_block_size};
+            }
+            std::array<size_type,2> res = {0,0};
+            std::array<size_type,2> local_sel;
+            local_sel[0]= sel0(bv.data(), hi_part_offset, high_val[0] + 1);
+            if (high_val[0] == high_val[1]) {
+                local_sel[1] = local_sel[0];
+            } else { // now high_val[0] < high_val[1]
+                if (zeros_in_high < high_val[1]+1) {
+                    res  = {{0, t_block_size}}; // initialized second result
+                } else {
+                    if (zeros_in_high < high_val[1]+1) {    // check if there is a zero to select
+                        res = {{0, t_block_size}};
+                    } else { // there is something to select ;)
+                        size_type skip = local_sel[0]+1;
+//std::cout<<"skip="<<skip<<std::endl;
+                        local_sel[1] = sel0(bv.data(), hi_part_offset+skip, high_val[1]-high_val[0]) + skip;
+                    }
+                }
+            }
+
+            bool done1 = (res[1]==t_block_size);
+//            std::cout<<"done1="<<done1<<std::endl;
+            for (size_t k=1-done1, s=done1; s<2; ++s, --k) {
+                size_type sel_high = local_sel[k];
+                size_type rank_low = sel_high - high_val[k];
+//                std::cout<<"k="<<k<<" sel_high="<<sel_high<<" rank_low="<<rank_low<<std::endl;
+                if (0 == rank_low) {
+                    return {{0,res[1]}};
+                }
+
+                size_type low_part_offset = start_offset + rank_low * width_low;
+                size_type val_low = ij[k] & bits::lo_set[width_low];
+//std::cout<<"val_low["<<k<<"]="<<val_low<<std::endl;
+                auto low_part_data_ptr = bv.data() + (low_part_offset / 64);
+                uint8_t low_part_in_word_offset = low_part_offset % 64;
+
+                do {
+                    if (!sel_high) {
+                        return {{0, res[1]}};
+                    }
+                    --rank_low;
+                    --sel_high;
+                    low_part_offset -= width_low;
+                    low_part_data_ptr = bv.data() + (low_part_offset / 64);
+                    low_part_in_word_offset = low_part_offset % 64;
+//std::cout<<"sel_high "<<bv[hi_part_offset+sel_high];
+//if ( bv[hi_part_offset+sel_high] ) {
+//    std::cout<<" ij[k]="<<ij[k]<<" "<< (bits::read_int(low_part_data_ptr, low_part_in_word_offset, width_low)|(high_val[k]<<width_low))
+//             <<" rank_low="<<rank_low;
+//}
+//std::cout<<std::endl;
+                } while (bv[hi_part_offset + sel_high] and bits::read_int(low_part_data_ptr, low_part_in_word_offset, width_low) >= val_low);
+                res[k] = rank_low+1;
+//std::cout<<"res["<<k<<"]="<<res[k]<<std::endl;
+            }
+            return res;
+        }
+};
+
+template <uint8_t t_b,
+          class t_hyb_sd_bv>
+class select_support_hyb_sd;
+
+template <uint8_t t_b,
+          class t_hyb_sd_bv>
+class rank_support_hyb_sd;
+
+template <uint16_t t_block_size = 128>
+class hyb_sd_vector
+{
+    public:
+        typedef bit_vector::size_type size_type;
+        typedef bool value_type;
+        typedef bit_vector::difference_type difference_type;
+        typedef random_access_const_iterator<hyb_sd_vector> iterator;
+        typedef iterator const_iterator;
+        typedef bv_tag index_category;
+
+        // typedef rank_support_hyb_sd<0, hyb_sd_vector> rank_0_type;
+        typedef rank_support_hyb_sd<1, hyb_sd_vector> rank_1_type;
+        // typedef select_support_hyb_sd<0, hyb_sd_vector> select_0_type;
+        typedef select_support_hyb_sd<1, hyb_sd_vector> select_1_type;
+
+    private:
+        sd_vector<> m_top;
+        sd_vector<>::select_1_type m_top_sel;
+        sd_vector<>::rank_1_type m_top_rank;
+        bit_vector m_bottom;
+        int_vector<> m_block_start;
+        int_vector<2> m_block_type;
+
+        size_type m_size = 0;
+        size_type m_num_ones = 0;
+
+
+    public:
+        static constexpr uint16_t block_size = t_block_size;
+
+    private:
+        enum class hyb_sd_blocktype : uint8_t {
+            EF=0,
+            BV=1,
+            FULL=2,
+            RL=3
+        };
+
+        std::pair<hyb_sd_blocktype, size_type>
+        determine_block_type(size_t u, int_vector<64>& data) const
+        {
+            if (u == t_block_size or data[t_block_size-1] == t_block_size-1)
+                return { hyb_sd_blocktype::FULL, 0 };
+            std::vector<std::pair<size_type, hyb_sd_blocktype>> size_and_type;
+            size_and_type.push_back({hyb_sd_block_ef<t_block_size>::estimate_size(u), hyb_sd_blocktype::EF});
+            size_and_type.push_back({hyb_sd_block_bv<t_block_size>::estimate_size(u), hyb_sd_blocktype::BV});
+            size_and_type.push_back({hyb_sd_block_rl<t_block_size>::estimate_size(u, data), hyb_sd_blocktype::RL});
+            std::sort(size_and_type.begin(), size_and_type.end());
+
+            /*            auto mini = std::min_element(size_and_type.begin(), size_and_type.end());
+                        return { std::get<1>(*mini), std::get<0>(*mini) };
+            */
+            if (std::get<1>(size_and_type[0]) == hyb_sd_blocktype::RL) {
+                auto rl_size = std::get<0>(size_and_type[0]);
+                auto next_size = std::get<0>(size_and_type[1]);
+                if (2*rl_size > next_size) {
+                    std::swap(size_and_type[0], size_and_type[1]);
+                }
+            }
+            return {std::get<1>(size_and_type[0]), std::get<0>(size_and_type[0])};
+        }
+
+        size_type compress_block(size_type i, int_vector<64>& data, size_t u)
+        {
+            size_type offset = m_block_start[i];
+            size_type written_bits = 0;
+            auto bt = determine_block_type(u, data);
+            auto type = bt.first;
+            auto size_in_bits = bt.second;
+            m_block_type[i] = static_cast<uint8_t>(type);
+            if (m_bottom.size() < offset + size_in_bits) {
+                m_bottom.resize(m_bottom.size() * 2 + size_in_bits);
+            }
+            switch (type) {
+                case hyb_sd_blocktype::BV:
+                    written_bits = hyb_sd_block_bv<t_block_size>::serialize(m_bottom, offset, data, u);
+                    break;
+                case hyb_sd_blocktype::EF:
+                    written_bits = hyb_sd_block_ef<t_block_size>::serialize(m_bottom, offset, data, u);
+                    break;
+                case hyb_sd_blocktype::FULL:
+                    /* nothing to store */
+                    break;
+                case hyb_sd_blocktype::RL:
+                    written_bits = hyb_sd_block_rl<t_block_size>::serialize(m_bottom, offset, data, u);
+                    break;
+            }
+            return written_bits;
+        }
+
+    public:
+
+        hyb_sd_vector() {}
+
+        hyb_sd_vector(const hyb_sd_vector& vec) {
+            *this = vec;
+        }
+
+        hyb_sd_vector(hyb_sd_vector&& vec) {
+            *this = std::move(vec);
+        }
+
+        hyb_sd_vector& operator=(const hyb_sd_vector& vec){
+            if (this != &vec) {
+                m_top = vec.m_top;
+                m_top_sel = vec.m_top_sel;
+                m_top_sel.set_vector(&m_top);
+                m_top_rank = vec.m_top_rank;
+                m_top_rank.set_vector(&m_top);
+                m_bottom = vec.m_bottom;
+                m_block_start = vec.m_block_start;
+                m_block_type = vec.m_block_type;
+                m_size = vec.m_size;
+                m_num_ones = vec.m_num_ones;
+            }
+            return *this;
+        }
+
+
+        hyb_sd_vector& operator=(hyb_sd_vector&& vec){
+            if (this != &vec) {
+                m_top = std::move(vec.m_top);
+                m_top_sel = std::move(vec.m_top_sel);
+                m_top_sel.set_vector(&m_top);
+                m_top_rank = std::move(vec.m_top_rank);
+                m_top_rank.set_vector(&m_top);
+                m_bottom = std::move(vec.m_bottom);
+                m_block_start = std::move(vec.m_block_start);
+                m_block_type = std::move(vec.m_block_type);
+                m_size = vec.m_size;
+                m_num_ones = vec.m_num_ones;
+            }
+            return *this;
+        }
+
+///*
+        explicit hyb_sd_vector(const bit_vector& bv) //: hyb_sd_vector(bv.ones_begin(),bv.ones_end(),bv.size())
+        {
+            if (bv.size() == 0) {
+                return;
+            }
+            m_size = bv.size();
+            m_num_ones = select_support_trait<1, 1>::arg_cnt(bv);
+            if (m_num_ones == 0) {
+                return;
+            }
+            size_type num_full_blocks = m_num_ones / t_block_size;
+            size_type num_blocks = num_full_blocks;
+            size_type num_leftover = m_num_ones % t_block_size;
+            size_type num_dummy = (t_block_size - num_leftover) % t_block_size;
+            bool has_leftover_block = num_leftover != 0;
+            if (has_leftover_block) {
+                num_blocks++;
+            }
+
+            // (1) fill the top level
+            std::vector<uint64_t> top_lvl;
+            size_t one_found = t_block_size - 1; // we always want to add the first one!
+            size_type last_one = 0;
+            for (size_type i = 0; i < bv.size(); i++) {
+                if (bv[i] == 1) {
+                    last_one = i;
+                    if (++one_found == t_block_size) {
+                        top_lvl.push_back(i);
+                        //top_lvl.push_back(i-t_block_size*top_lvl.size());
+                        one_found = 0;
+                    }
+                }
+            }
+            // terminate the top level so top[i+1] - top[i] always works
+//        top_lvl.push_back(last_one + num_dummy + 1);
+            top_lvl.push_back(std::max(bv.size(), last_one+1) + num_dummy);
+
+            // (2) bottom level
+            m_block_start.resize(num_blocks + 1);
+            m_block_type.resize(num_blocks);
+            size_type value_offset = 0;
+            size_type written_bits = 0;
+            int_vector<64> tmp_data(t_block_size);
+            size_t j = 0;
+            for (size_type i = 0; i < num_blocks; i++) {
+                m_block_start[i] = written_bits;
+                // (2a) compute block data
+                value_offset = top_lvl[i];
+                one_found = 0;
+                while (one_found != t_block_size) {
+                    if (j <= last_one) {
+                        if (bv[j] == 1) {
+                            tmp_data[one_found++] = j - value_offset;
+                        }
+                    } else {
+                        tmp_data[one_found] = std::max(m_size-value_offset, tmp_data[one_found - 1] + 1);
+                        one_found++;
+                    }
+                    ++j;
+                }
+                // (2b) compress block
+                size_type block_universe = top_lvl[i + 1] - top_lvl[i];
+                auto wb = compress_block(i, tmp_data, block_universe);
+                written_bits += wb;
+            }
+            m_block_start[num_blocks] = written_bits;
+            m_bottom.resize(written_bits);
+
+            // (3) encode the top level
+            m_top = decltype(m_top)(top_lvl.begin(), top_lvl.end());
+            m_top_sel = decltype(m_top_sel)(&m_top);
+            m_top_rank = decltype(m_top_rank)(&m_top);
+
+            // (4) bit compress pointers
+            util::bit_compress(m_block_start);
+        }
+//*/
+
+        template <class t_itr>
+        hyb_sd_vector(const t_itr begin, const t_itr end, size_type bv_size = 0)
+        {
+            if (begin == end and bv_size==0) {
+                return;
+            }
+            if (!is_sorted(begin, end)) {
+                throw std::runtime_error("hyb_sd_vector: source list is not sorted.");
+            }
+            m_size = bv_size;
+            if (bv_size == 0)
+                m_size = *(end - 1) + 1;
+            m_num_ones = std::distance(begin, end);
+            if (m_num_ones==0) {
+                return;
+            }
+//std::cout<<"m_num_ones="<<m_num_ones<<std::endl;
+            size_type num_full_blocks = m_num_ones / t_block_size;
+            size_type num_blocks = num_full_blocks;
+            size_type num_leftover = m_num_ones % t_block_size;
+            size_type num_dummy = (t_block_size - num_leftover) % t_block_size;
+            bool has_leftover_block = num_leftover != 0;
+            if (has_leftover_block) {
+                num_blocks++;
+            }
+
+            // (1) fill the top level
+            std::vector<uint64_t> top_lvl;
+            auto itr = begin;
+            while (itr < end) {
+                top_lvl.push_back(*itr);
+                itr += t_block_size;
+            }
+            // terminate the top level so top[i+1] - top[i] always works
+            top_lvl.push_back(std::max(bv_size, *(end - 1)+1) + num_dummy);
+
+            // (2) bottom level
+            m_block_start.resize(num_blocks + 1);
+            m_block_type.resize(num_blocks);
+//std::cout<<"num_blocks="<<num_blocks<<std::endl;
+            itr = begin;
+            size_type value_offset = 0;
+            size_type written_bits = 0;
+            int_vector<64> tmp_data(t_block_size);
+            for (size_type i = 0; i < num_blocks; i++) {
+                m_block_start[i] = written_bits;
+                // (2a) compute block data
+                value_offset = top_lvl[i];
+                for (size_type j = 0; j < t_block_size; j++) {
+                    if (itr == end) {
+                        tmp_data[j] = std::max(m_size-value_offset, tmp_data[j - 1] + 1);
+                    } else {
+                        tmp_data[j] = *itr - value_offset;
+                        ++itr;
+                    }
+                }
+
+                // (2b) compress block
+                size_type block_universe = top_lvl[i + 1] - top_lvl[i];
+                // std::cout << "compress block " << i << std::endl;
+                auto wb = compress_block(i, tmp_data, block_universe);
+                written_bits += wb;
+            }
+            m_block_start[num_blocks] = written_bits;
+            m_bottom.resize(written_bits);
+
+            // (3) encode the top level
+            m_top = decltype(m_top)(top_lvl.begin(), top_lvl.end());
+            m_top_sel = decltype(m_top_sel)(&m_top);
+            m_top_rank = decltype(m_top_rank)(&m_top);
+
+            // (4) bit compress pointers
+            util::bit_compress(m_block_start);
+//std::cout<<"bye hyb_sd"<<std::endl;
+        }
+
+
+        value_type operator[](size_type i)const
+        {
+            auto ranks = rank_1({{i+1,i}});
+            return ranks[1]-ranks[0];
+//            return rank_1(i+1) - rank_1(i);
+        }
+
+        //! Accessing the i-th element of the original bit_vector
+        size_type select_1(size_type i) const
+        {
+            i = i - 1;
+            auto block_id = i / t_block_size;
+            auto in_block_offset = i % t_block_size;
+            auto top_value = m_top_sel(block_id + 1);
+            size_type res = top_value;
+
+            if (in_block_offset == 0)
+                return res;
+
+            auto u = m_top_sel(block_id + 2) - top_value;
+            /*
+            auto bt = determine_block_type(u);
+            auto block_type = bt.first;
+            */
+
+            auto block_type = static_cast<hyb_sd_blocktype>(m_block_type[block_id]);
+            size_type block_offset = m_block_start[block_id];
+
+            switch (block_type) {
+                case hyb_sd_blocktype::BV:
+                    res += hyb_sd_block_bv<t_block_size>::select_1(m_bottom, block_offset, in_block_offset, u);
+                    break;
+                case hyb_sd_blocktype::EF:
+                    res += hyb_sd_block_ef<t_block_size>::select_1(m_bottom, block_offset, in_block_offset, u);
+                    break;
+                case hyb_sd_blocktype::FULL:
+                    res += hyb_sd_block_full<t_block_size>::select_1(m_bottom, block_offset, in_block_offset, u);
+                    break;
+                case hyb_sd_blocktype::RL:
+                    res += hyb_sd_block_rl<t_block_size>::select_1(m_bottom, m_block_start, block_id, in_block_offset, u);
+                    break;
+            }
+            return res;
+        }
+
+        size_type rank_1(size_type i) const
+        {
+//            bool debug = false;
+//            if ( i==2075 or i==2076) {
+//                debug = true;
+//            }
+//std::cout<<"!!! rank_1("<<i<<")"<<std::endl;
+            if (i > m_size or m_num_ones == 0) {
+//std::cout<<"!!! i > m_size "<<i<<" > "<<m_size<<std::endl;
+                return m_num_ones;
+            }
+            auto block_id = m_top_rank(i);
+            if (block_id == 0) {
+//std::cout<<"!!! block_id=0"<<std::endl;
+                return 0;
+            }
+            block_id -= 1;
+            size_type res = block_id * t_block_size;
+            auto top_value = m_top_sel(block_id + 1);
+            size_type in_block_i = i;
+            in_block_i -= top_value;
+            if (in_block_i == 0) {
+//std::cout<<"!!! in_block_i=0"<<std::endl;
+                return res;
+            }
+// TODO: can we return res+in_block_i if top_value-i==in_block_i ???
+
+            auto block_type = static_cast<hyb_sd_blocktype>(m_block_type[block_id]);
+//if (debug) {
+//    std::cout<<"!!! i="<<i<<" block_id="<<block_id<<" res="<<res<<std::endl;
+//    std::cout<<"!!! hyb_sd_blocktype = "<< (int)block_type <<std::endl;
+//}
+            if (block_type == hyb_sd_blocktype::FULL) {
+//if (debug) std::cout<<"!!! hyb_sd_blocktype::FULL"<<std::endl;
+                return res + hyb_sd_block_full<t_block_size>::rank_1(m_bottom, m_block_start, block_id, in_block_i, 0);
+            }
+
+            auto u = m_top_sel(block_id + 2) - top_value;
+//            auto bt = determine_block_type(u);
+//            auto block_type = bt.first;
+//            size_type block_offset = m_block_start[block_id];
+
+            switch (block_type) {
+                case hyb_sd_blocktype::BV:
+//             std::cout << "!!!BV" << std::endl;
+                    res += hyb_sd_block_bv<t_block_size>::rank_1(m_bottom, m_block_start, block_id, in_block_i, u);
+                    break;
+                case hyb_sd_blocktype::EF:
+//if (debug){
+//    std::cout << "!!!single EF in_block_i="<<in_block_i << std::endl;
+//}
+                    res += hyb_sd_block_ef<t_block_size>::rank_1(m_bottom, m_block_start, block_id, in_block_i, u);
+                    break;
+                case hyb_sd_blocktype::FULL:
+//             std::cout << "!!!FULL" << std::endl;
+                    res += hyb_sd_block_full<t_block_size>::rank_1(m_bottom, m_block_start, block_id, in_block_i, u);
+                    break;
+                case hyb_sd_blocktype::RL:
+//             std::cout << "!!!RL" << std::endl;
+                    res += hyb_sd_block_rl<t_block_size>::rank_1(m_bottom, m_block_start, block_id, in_block_i, u);
+                    break;
+            }
+//if (debug) {
+//    std::cout<<"!!!!  res="<<res<<std::endl;
+//}
+            return res;
+        }
+
+        std::array<size_type,2>
+        rank_1(std::array<size_type,2> ij) const
+        {
+            if (ij[0] > ij[1]) {
+                return {rank_1(ij[0]),rank_1(ij[1])};
+            }
+            // no we know ij[0] <= ij[1]
+            if (ij[0] > m_size or m_num_ones == 0) {
+                return {m_num_ones, m_num_ones};
+            }
+            if (ij[1] > m_size or m_num_ones == 0) {
+                return {rank_1(ij[0]), m_num_ones};
+            }
+            auto block_id = m_top_rank(ij[0]);
+            if (block_id == 0) {
+                size_type first_element = m_top_sel(1);
+                if (ij[1] <= first_element) {
+                    return {{0,0}};
+                }
+                return {0, rank_1(ij[1])}; // TODO: can still be optimized
+            }
+            block_id -= 1;
+            size_type r = block_id * t_block_size;
+            auto top_value = m_top_sel(block_id + 1);
+            size_type in_block_i = ij[0];
+            in_block_i -= top_value;
+            size_type in_block_j = ij[1];
+            in_block_j -= top_value;
+
+            if (in_block_i == 0) {
+                if (ij[0]==ij[1]) {
+                    return {r,r};
+                }
+                return {r, rank_1(ij[1])}; // TODO: can still be optimized
+            }
+
+            auto block_type = static_cast<hyb_sd_blocktype>(m_block_type[block_id]);
+            if (block_type == hyb_sd_blocktype::FULL and in_block_j < t_block_size) {
+                return {r+hyb_sd_block_full<t_block_size>::rank_1(m_bottom, m_block_start, block_id, in_block_i, 0),
+                        r+hyb_sd_block_full<t_block_size>::rank_1(m_bottom, m_block_start, block_id, in_block_j, 0)
+                       };
+            }
+
+            auto u = m_top_sel(block_id + 2) - top_value;
+//            auto bt = determine_block_type(u);
+//            auto block_type = bt.first;
+//            size_type block_offset = m_block_start[block_id];
+            std::array<size_type, 2> res {{r,r}};
+
+            switch (block_type) {
+                case hyb_sd_blocktype::BV:
+//                    std::cout<<"double rank_1 for BV"<<std::endl;
+                    if (in_block_j >= u) {
+                        res[0] += hyb_sd_block_bv<t_block_size>::rank_1(m_bottom, m_block_start, block_id, in_block_i, u);
+                    } else {
+                        auto in_block_rank = hyb_sd_block_bv<t_block_size>::rank_1(m_bottom, m_block_start, block_id, {in_block_i, in_block_j}, u);
+                        res[0] += in_block_rank[0];
+                        res[1] += in_block_rank[1];
+                    }
+                    break;
+                case hyb_sd_blocktype::EF:
+//                    std::cout<<"double rank_1 for EF"<<std::endl;
+                    if (in_block_j >= u) {
+                        res[0] += hyb_sd_block_ef<t_block_size>::rank_1(m_bottom, m_block_start, block_id, in_block_i, u);
+                    } else {
+//                        std::cout<<"call double"<<std::endl;
+                        auto in_block_rank = hyb_sd_block_ef<t_block_size>::rank_1(m_bottom, m_block_start, block_id, {in_block_i, in_block_j}, u);
+                        res[0] += in_block_rank[0];
+                        res[1] += in_block_rank[1];
+                    }
+                    break;
+                case hyb_sd_blocktype::FULL:
+//                    std::cout<<"double rank_1 for FULL"<<std::endl;
+                    res[0] += hyb_sd_block_full<t_block_size>::rank_1(m_bottom, m_block_start, block_id, in_block_i, u);
+                    if (in_block_j < u) {
+                        res[1] += hyb_sd_block_full<t_block_size>::rank_1(m_bottom, m_block_start, block_id, in_block_j, u);
+                    }
+                    break;
+                case hyb_sd_blocktype::RL:
+//                    std::cout<<"double rank_1 for RL"<<std::endl;
+                    if (in_block_j >= u) {
+                        res[0] += hyb_sd_block_rl<t_block_size>::rank_1(m_bottom, m_block_start, block_id, in_block_i, u);
+                    } else {
+                        auto in_block_rank = hyb_sd_block_rl<t_block_size>::rank_1(m_bottom, m_block_start, block_id, {in_block_i, in_block_j}, u);
+                        res[0] += in_block_rank[0];
+                        res[1] += in_block_rank[1];
+                    }
+                    break;
+            }
+            if (in_block_j >= u) {
+                res[1] = rank_1(ij[1]);
+            }
+            return res;
+        }
+
+        //! Get the integer value of the binary string of length len starting at position idx.
+        uint64_t get_int(size_type idx, const uint8_t len = 64) const
+        {
+            uint64_t x = 0ULL;
+            for (size_t i=0; i<len and idx+i < size(); ++i) {
+                x |= (static_cast<uint64_t>((*this)[idx+i])) << i;
+            }
+            return x;
+        }
+
+        //! Returns the size of the original bit vector.
+        size_type size() const
+        {
+            return m_size;
+        }
+
+        //! Serializes the data structure into the given ostream
+        size_type serialize(std::ostream& out, structure_tree_node* v = nullptr, std::string name = "") const
+        {
+            structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
+            size_type written_bytes = 0;
+            written_bytes += write_member(m_size, out, child, "size");
+            written_bytes += write_member(m_num_ones, out, child, "num_ones");
+            written_bytes += m_top.serialize(out, child, "top");
+            written_bytes += m_top_sel.serialize(out, child, "top_sel");
+            written_bytes += m_top_rank.serialize(out, child, "top_rank");
+            //written_bytes += m_bottom.serialize(out, nullptr, "bottom");
+            auto bottom_bytes = m_bottom.serialize(out, nullptr, "bottom");
+            {
+                structure_tree_node* bottom_child = structure_tree::add_child(child, "bottom", util::class_name(m_bottom));
+                std::array<size_type,4> written_bits = {{0,0,0,0}};
+                for (size_t i=1; i<m_block_start.size(); ++i) {
+                    written_bits[m_block_type[i-1]] += m_block_start[i]-m_block_start[i-1];
+                }
+                std::vector<std::string> names = {"EF","BV","FULL","RL"};
+                for (size_t i=0; i<written_bits.size(); ++i) {
+                    structure_tree_node* block_child = structure_tree::add_child(bottom_child, names[i], util::class_name(m_bottom));
+                    structure_tree::add_size(block_child, (written_bits[i]+7)/8);
+                }
+                structure_tree::add_size(bottom_child, bottom_bytes);
+            }
+            written_bytes += bottom_bytes;
+            written_bytes += m_block_start.serialize(out, child, "block_start");
+            written_bytes += m_block_type.serialize(out, child, "block_type");
+            structure_tree::add_size(child, written_bytes);
+            return written_bytes;
+        }
+
+        //! Loads the data structure from the given istream.
+        void load(std::istream& in)
+        {
+            read_member(m_size, in);
+            read_member(m_num_ones, in);
+            m_top.load(in);
+            m_top_sel.load(in);
+            m_top_sel.set_vector(&m_top);
+            m_top_rank.load(in);
+            m_top_rank.set_vector(&m_top);
+            m_bottom.load(in);
+            m_block_start.load(in);
+            m_block_type.load(in);
+        }
+
+        iterator begin() const
+        {
+            return iterator(this, 0);
+        }
+
+        iterator end() const
+        {
+            return iterator(this, size());
+        }
+};
+
+//! Select data structure for hyb_sd_vector
+template <uint8_t t_b = 1,
+          class hyb_bv_type = hyb_sd_vector<>>
+class select_support_hyb_sd
+{
+    public:
+        typedef typename hyb_bv_type::size_type size_type;
+        typedef hyb_bv_type bit_vector_type;
+        enum { bit_pat = t_b };
+        enum { bit_pat_len = (uint8_t)1 };
+        static constexpr uint16_t block_size = hyb_bv_type::block_size;
+
+    private:
+        const hyb_bv_type* m_v;
+
+    public:
+        explicit select_support_hyb_sd(const hyb_bv_type* v = nullptr)
+        {
+            set_vector(v);
+        }
+
+        size_type select(size_type i) const
+        {
+            return m_v->select_1(i);
+        }
+
+        size_type operator()(size_type i) const
+        {
+            return select(i);
+        }
+
+        size_type size() const
+        {
+            return m_v->size();
+        }
+
+        void set_vector(const hyb_bv_type* v = nullptr)
+        {
+            m_v = v;
+        }
+
+        select_support_hyb_sd& operator=(const select_support_hyb_sd& ss)
+        {
+            if (this != &ss) {
+                set_vector(ss.m_v);
+            }
+            return *this;
+        }
+
+        void load(std::istream&, const hyb_bv_type* v = nullptr)
+        {
+            set_vector(v);
+        }
+
+        size_type serialize(std::ostream& out, structure_tree_node* v = nullptr, std::string name = "") const
+        {
+            return serialize_empty_object(out, v, name, this);
+        }
+};
+
+//! Rank data structure for hyb_sd_vector
+template <uint8_t t_b = 1,
+          class hyb_bv_type = hyb_sd_vector<>>
+class rank_support_hyb_sd
+{
+    public:
+        typedef typename hyb_bv_type::size_type size_type;
+        typedef hyb_bv_type bit_vector_type;
+        enum { bit_pat = t_b };
+        enum { bit_pat_len = (uint8_t)1 };
+        static constexpr uint16_t block_size = hyb_bv_type::block_size;
+
+    private:
+        const hyb_bv_type* m_v;
+
+    public:
+        explicit rank_support_hyb_sd(const hyb_bv_type* v = nullptr)
+        {
+            set_vector(v);
+        }
+
+        template<typename t_pos>
+        t_pos rank(t_pos i) const
+        {
+            return m_v->rank_1(i);
+        }
+
+        template<typename t_pos>
+        t_pos operator()(t_pos i) const
+        {
+            return rank(i);
+        }
+
+        size_type size() const
+        {
+            return m_v->size();
+        }
+
+        void set_vector(const hyb_bv_type* v = nullptr)
+        {
+            m_v = v;
+        }
+
+        rank_support_hyb_sd& operator=(const rank_support_hyb_sd& ss)
+        {
+            if (this != &ss) {
+                set_vector(ss.m_v);
+            }
+            return *this;
+        }
+
+        void load(std::istream&, const hyb_bv_type* v = nullptr)
+        {
+            set_vector(v);
+        }
+
+        size_type serialize(std::ostream& out, structure_tree_node* v = nullptr, std::string name = "") const
+        {
+            return serialize_empty_object(out, v, name, this);
+        }
+};
+
+} // end namespace
+#endif
diff --git a/include/sdsl/structure_tree.hpp b/include/sdsl/structure_tree.hpp
index ecf3dc303..74c78bf9d 100644
--- a/include/sdsl/structure_tree.hpp
+++ b/include/sdsl/structure_tree.hpp
@@ -121,6 +121,7 @@ inline std::string create_html_header(const char* file_name)
 			   << "   <head>\n"
 			   << "    <meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\">\n"
 			   << "    <title>" << file_name << "</title>\n"
+			   << "    <script src=\"file:///Users/sgog/xxs/sdsl-lite/external/d3/d3.min.js\"></script>"
 			   << "    <script src=\"http://d3js.org/d3.v2.js\"></script>\n"
 			   << "    <style type=\"text/css\">\n"
 			   << "      path { stroke: #000; stroke-width: 0.8; cursor: pointer; }\n"
diff --git a/include/sdsl/suffix_array_algorithm.hpp b/include/sdsl/suffix_array_algorithm.hpp
index 2e7ae93c0..f746821b5 100644
--- a/include/sdsl/suffix_array_algorithm.hpp
+++ b/include/sdsl/suffix_array_algorithm.hpp
@@ -9,6 +9,7 @@
 #define INCLUDED_SDSL_SUFFIX_ARRAY_ALGORITHM
 
 #include <iterator>
+#include <array>
 #include "suffix_array_helper.hpp"
 
 namespace sdsl {
@@ -157,24 +158,28 @@ typename t_csa::size_type& r_res,
 SDSL_UNUSED typename std::enable_if<std::is_same<csa_tag, typename t_csa::index_category>::value,
 									csa_tag>::type x = csa_tag())
 {
-	assert(l <= r);
-	assert(r < csa.size());
-	typename t_csa::size_type cc = csa.char2comp[c];
-	if (cc == 0 and c > 0) {
-		l_res = 1;
-		r_res = 0;
-	} else {
-		typename t_csa::size_type c_begin = csa.C[cc];
-		if (l == 0 and r + 1 == csa.size()) {
-			l_res = c_begin;
-			r_res = csa.C[cc + 1] - 1;
-		} else {
-			l_res = c_begin + csa.bwt.rank(l, c);		  // count c in bwt[0..l-1]
-			r_res = c_begin + csa.bwt.rank(r + 1, c) - 1; // count c in bwt[0..r]
-		}
-	}
-	assert(r_res + 1 - l_res >= 0);
-	return r_res + 1 - l_res;
+    assert(l <= r);
+    assert(r < csa.size());
+    auto cc = csa.char2comp[c];
+    if (cc == 0 and c > 0) {
+        l_res = 1;
+        r_res = 0;
+    } else {
+        auto c_begin = csa.C[cc];
+        if (l == 0 and r+1 == csa.size()) {
+            l_res = c_begin;
+            r_res = csa.C[cc+1] - 1;
+        } else {
+//            TODO: use double rank once impelemented in all CSAs            
+//            auto lr = csa.bwt.rank(std::array<typename t_csa::size_type,2> {l,r+1},c);
+//            l_res = c_begin + lr[0];       // count c in bwt[0..l-1]
+//            r_res = c_begin + lr[1] - 1;   // count c in bwt[0..r]
+            l_res = c_begin + csa.bwt.rank(l, c); // count c in bwt[0..l-1]
+            r_res = c_begin + csa.bwt.rank(r+1, c) - 1; // count c in bwt[0..r]
+        }
+    }
+    assert(r_res+1-l_res >= 0);
+    return r_res+1-l_res;
 }
 
 
@@ -224,6 +229,92 @@ SDSL_UNUSED typename std::enable_if<std::is_same<csa_tag, typename t_csa::index_
 	return r + 1 - l;
 }
 
+
+
+template<typename t_ev,
+         uint32_t t_dens,
+         uint32_t t_inv_dens,
+         typename t_sa_sample_strat,
+         typename t_isa_sample_strat,
+         uint8_t t_q,
+         typename t_bv,
+         typename t_rs,
+         typename t_ss,
+         typename t_pat_iter
+         >
+uint64_t backward_search(
+    const csa_sada<t_ev, t_dens, t_inv_dens, t_sa_sample_strat, t_isa_sample_strat,
+    succinct_multibyte_alphabet<t_q, t_bv, t_rs, t_ss>>& csa,
+    uint64_t l,
+    uint64_t r,
+    t_pat_iter begin,
+    t_pat_iter end,
+    uint64_t& l_res,
+    uint64_t& r_res
+)
+{
+    assert(l <= r); assert(r < csa.size());
+    typedef typename std::remove_reference<decltype(csa)>::type t_csa;
+    using multi_comp_char_type = typename t_csa::alphabet_type::multi_comp_char_type;
+    constexpr auto q = t_csa::alphabet_type::q;
+
+    auto m = std::distance(begin, end);
+    if (static_cast<uint64_t>(m) < q) {
+        t_pat_iter it = end;
+        while (begin < it and r+1-l > 0) {
+            --it;
+            backward_search(csa, l, r, (typename t_csa::char_type)*it, l, r);
+        }
+        l_res = l;
+        r_res = r;
+        return r+1-l;
+    }
+
+    multi_comp_char_type x {0};
+    t_pat_iter it = end;
+    size_t processed = 0;
+//    std::cout<<"simga="<<csa.sigma<<std::endl;
+//    std::cout<<"sigma_pow_q_1="<<sigma_pow_q_1<<std::endl;
+
+    while (begin < it and processed < q-1) {
+        --it;
+        ++processed;
+        if (!cyclic_insert_hi(x, *it, csa.alphabet)) {
+            l_res = 1;
+            r_res = 0;
+            return 0;
+        }
+    }
+
+    while (begin < it and r+1-l > 0) {
+        --it;
+        if (!cyclic_insert_hi(x, *it, csa.alphabet)) {
+            l_res = 1;
+            r_res = 0;
+            return 0;
+        }
+//        std::cout<<"\nx="<<static_cast<uint64_t>(x)<<std::endl;
+        auto c_begin = csa.C[x];
+        if (l == 0 and r+1 == csa.size()) {
+            l = c_begin;
+            r = csa.C[x+1] - 1;
+//            std::cout<<"initial step ["<<l<<","<< r<<"] "<<csa.C[x+1]<<" "<<static_cast<uint64_t>(x+1)<<std::endl;
+        } else {
+            auto lr = csa.double_rank_comp_bwt(l, r+1, x);
+            l = c_begin + std::get<0>(lr);
+            r = c_begin + std::get<1>(lr)-1;
+//            l = c_begin + csa.rank_comp_bwt(l, x); // count c in bwt[0..l-1]
+//            r = c_begin + csa.rank_comp_bwt(r+1, x) - 1; // count c in bwt[0..r]
+        }
+//        std::cout<<"bw_search debug ["<<l<<","<<r<<"] x="<<static_cast<uint64_t>(x)<<std::endl;
+    }
+    l_res = l;
+    r_res = r;
+    assert(r+1-l >= 0);
+    return r+1-l;
+}
+
+
 //! Bidirectional search for a character c on an interval \f$[l_fwd..r_fwd]\f$ of the suffix array.
 /*!
  * \param csa_fwd   The CSA object of the forward text in which the backward_search should be done.
@@ -471,9 +562,11 @@ typename t_csx::size_type count(const t_csx& csx, t_pat_iter begin, t_pat_iter e
  * \par Time complexity
  *        \f$ \Order{ t_{backward\_search} } \f$
  */
-
-template <class t_csx>
-typename t_csx::size_type count(const t_csx& csx, const typename t_csx::string_type& pat)
+template<class t_csx>
+typename t_csx::size_type count(
+    const t_csx& csx,
+    const typename t_csx::string_type& pat
+)
 {
 	typename t_csx::index_category tag;
 	return count(csx, pat.begin(), pat.end(), tag);
@@ -524,7 +617,7 @@ t_rac locate(const t_csa& csa,
 			 typename std::enable_if<std::is_same<csa_tag, typename t_csa::index_category>::value,
 									 csa_tag>::type x = csa_tag())
 {
-	typename t_csa::size_type occ_begin, occ_end, occs;
+    typename t_csa::size_type occ_begin = 0, occ_end = 0, occs = 0;
 	occs = backward_search(csa, 0, csa.size() - 1, begin, end, occ_begin, occ_end);
 	t_rac occ(occs);
 	for (typename t_csa::size_type i = 0; i < occs; ++i) {
diff --git a/include/sdsl/suffix_array_helper.hpp b/include/sdsl/suffix_array_helper.hpp
index 9fc069521..d36e44b81 100644
--- a/include/sdsl/suffix_array_helper.hpp
+++ b/include/sdsl/suffix_array_helper.hpp
@@ -11,6 +11,7 @@
 #include <stdint.h>
 #include <cstdlib>
 #include <cassert>
+#include <array>
 #include "iterators.hpp"
 
 namespace sdsl {
@@ -225,22 +226,32 @@ class bwt_of_csa_psi {
 
 	//! Calculates how many symbols c are in the prefix [0..i-1]
 	/*!
-         *  \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$.
-         *  \param c The symbol to count the occurrences in the prefix.
-         *    \returns The number of occurrences of symbol c in the prefix [0..i-1].
-         *  \par Time complexity
-         *        \f$ \Order{\log n t_{\Psi}} \f$
-         */
-	size_type rank(size_type i, const char_type c) const { return m_csa.rank_bwt(i, c); }
+     *  \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$.
+     *  \param c The symbol to count the occurrences in the prefix.
+     *    \returns The number of occurrences of symbol c in the prefix [0..i-1].
+     *  \par Time complexity
+     *        \f$ \Order{\log n t_{\Psi}} \f$
+     */
+    template<typename t_char>
+    size_type rank(size_type i, const t_char c)const
+    {
+        return m_csa.rank_bwt(i, c);
+    }
+
+    template<typename t_char>
+    std::array<size_type,2> rank(std::array<size_type,2> ij, const t_char c)const
+    {
+        return m_csa.rank_bwt(ij, c);
+    }
 
 	//! Calculates the position of the i-th c.
 	/*!
-         *  \param i The i-th occurrence. \f$i\in [1..rank(size(),c)]\f$.
-         *  \param c Symbol c.
-         *    \returns The position of the i-th c or size() if c does occur less then i times.
-         *  \par Time complexity
-         *        \f$ \Order{t_{\Psi}} \f$
-         */
+     *  \param i The i-th occurrence. \f$i\in [1..rank(size(),c)]\f$.
+     *  \param c Symbol c.
+     *    \returns The position of the i-th c or size() if c does occur less then i times.
+     *  \par Time complexity
+     *        \f$ \Order{t_{\Psi}} \f$
+     */
 	size_type select(size_type i, const char_type c) const { return m_csa.select_bwt(i, c); }
 
 	//! Returns the size of the \f$\Psi\f$ function.
@@ -365,22 +376,28 @@ class bwt_of_csa_wt {
 
 	//! Calculates how many symbols c are in the prefix [0..i-1].
 	/*!
-         *  \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$.
-         *  \param c The symbol to count the occurrences in the prefix.
-         *    \returns The number of occurrences of symbol c in the prefix [0..i-1].
-         *  \par Time complexity
-         *        \f$ \Order{\log |\Sigma|} \f$
-         */
+     *  \param i The exclusive index of the prefix range [0..i-1], so \f$i\in [0..size()]\f$.
+     *  \param c The symbol to count the occurrences in the prefix.
+     *    \returns The number of occurrences of symbol c in the prefix [0..i-1].
+     *  \par Time complexity
+     *        \f$ \Order{\log |\Sigma|} \f$
+     */
 	size_type rank(size_type i, const char_type c) const { return m_csa.rank_bwt(i, c); }
 
-	//! Calculates the position of the i-th c.
-	/*!
-         *  \param i The i-th occurrence. \f$i\in [1..rank(size(),c)]\f$.
-         *  \param c Symbol c.
-         *    \returns The position of the i-th c or size() if c does occur less then i times.
-         *  \par Time complexity
-         *        \f$ \Order{t_{\Psi}} \f$
-         */
+    template<typename t_char>
+    std::array<size_type,2> rank(std::array<size_type,2> ij, const t_char c)const
+    {
+        return m_csa.rank_bwt(ij, c);
+    }
+
+    //! Calculates the position of the i-th c.
+    /*!
+     *  \param i The i-th occurrence. \f$i\in [1..rank(size(),c)]\f$.
+     *  \param c Symbol c.
+     *    \returns The position of the i-th c or size() if c does occur less then i times.
+     *  \par Time complexity
+     *        \f$ \Order{t_{\Psi}} \f$
+     */
 	size_type select(size_type i, const char_type c) const { return m_csa.select(i, c); }
 
 
diff --git a/include/sdsl/suffix_arrays.hpp b/include/sdsl/suffix_arrays.hpp
index 6dce11e69..f229802f0 100644
--- a/include/sdsl/suffix_arrays.hpp
+++ b/include/sdsl/suffix_arrays.hpp
@@ -15,6 +15,7 @@
 #include "csa_bitcompressed.hpp"
 #include "csa_wt.hpp"
 #include "csa_sada.hpp"
+#include "csa_sada2.hpp"
 #include "wavelet_trees.hpp"
 #include "construct.hpp"
 #include "suffix_array_algorithm.hpp"
diff --git a/include/sdsl/uint128_t.hpp b/include/sdsl/uint128_t.hpp
index 1c64b8f5d..47d84c065 100644
--- a/include/sdsl/uint128_t.hpp
+++ b/include/sdsl/uint128_t.hpp
@@ -13,7 +13,7 @@
 
 namespace sdsl {
 
-#if defined(__GNUC__)
+#if 1
 
 typedef unsigned int uint128_t __attribute__((mode(TI)));
 
diff --git a/include/sdsl/wm_int.hpp b/include/sdsl/wm_int.hpp
index dab350814..02a972124 100644
--- a/include/sdsl/wm_int.hpp
+++ b/include/sdsl/wm_int.hpp
@@ -680,7 +680,7 @@ class wm_int {
 			r		 = {left_sp, left_sp + left_size - 1};
 			res[i++] = {right_sp, right_sp + right_size - 1};
 		}
-		return {ranges, std::move(res)};
+		return {{ranges, std::move(res)}};
 	}
 
 	//! Returns for a range its left and right child ranges
diff --git a/include/sdsl/wt_int.hpp b/include/sdsl/wt_int.hpp
index 685ef883a..eb0a531c7 100644
--- a/include/sdsl/wt_int.hpp
+++ b/include/sdsl/wt_int.hpp
@@ -888,7 +888,7 @@ class wt_int {
 			r		 = {left_sp, left_sp + left_size - 1};
 			res[i++] = {right_sp, right_sp + right_size - 1};
 		}
-		return {ranges, std::move(res)};
+		return {{ranges, std::move(res)}};
 	}
 
 	//! Returns for a range its left and right child ranges
diff --git a/lib/hyb_sd_vector.cpp b/lib/hyb_sd_vector.cpp
new file mode 100644
index 000000000..1a229a7a3
--- /dev/null
+++ b/lib/hyb_sd_vector.cpp
@@ -0,0 +1,4 @@
+#include "sdsl/hyb_sd_vector.hpp"
+namespace sdsl {
+  size_t g_saved_bits = 0;
+}
diff --git a/test/bit_vector_test.typedef b/test/bit_vector_test.typedef
index bf1fb2710..ec0e5a693 100644
--- a/test/bit_vector_test.typedef
+++ b/test/bit_vector_test.typedef
@@ -1,4 +1,5 @@
 bit_vector
+hyb_sd_vector<>
 bit_vector_il<>
 rrr_vector<>
 sd_vector<>
diff --git a/test/csa_byte_test.cpp.cmake b/test/csa_byte_test.cpp.cmake
index 4336b64d7..1c3aa41e5 100644
--- a/test/csa_byte_test.cpp.cmake
+++ b/test/csa_byte_test.cpp.cmake
@@ -234,7 +234,6 @@ TYPED_TEST(csa_byte_test, psi_lf_access)
     }
 }
 
-
 //! Test access after swap
 TYPED_TEST(csa_byte_test, swap)
 {
diff --git a/test/csa_byte_test.typedef b/test/csa_byte_test.typedef
index fb8291da7..3a0306616 100644
--- a/test/csa_byte_test.typedef
+++ b/test/csa_byte_test.typedef
@@ -1,3 +1,4 @@
 csa_wt<>
 csa_sada<>
+csa_sada2<>
 csa_bitcompressed<>
diff --git a/test/csa_int_test.typedef b/test/csa_int_test.typedef
index a0d8cffc2..49635dea7 100644
--- a/test/csa_int_test.typedef
+++ b/test/csa_int_test.typedef
@@ -1,3 +1,4 @@
 csa_wt<wt_int<>, 32, 32, sa_order_sa_sampling<>, isa_sampling<>, int_alphabet<>>
 csa_sada<enc_vector<>, 32, 32, sa_order_sa_sampling<>, isa_sampling<>, int_alphabet<>>
+csa_sada2<hyb_sd_vector<>, 32, 64, sa_order_sa_sampling<>, isa_sampling<>, int_alphabet<>>
 csa_bitcompressed<int_alphabet<>>
diff --git a/test/rank_support_test.typedef b/test/rank_support_test.typedef
index 5f75d4c6c..1a797aead 100644
--- a/test/rank_support_test.typedef
+++ b/test/rank_support_test.typedef
@@ -1,4 +1,5 @@
 rank_support_il<1, 512>
+hyb_sd_vector<>::rank_1_type
 rank_support_rrr<>
 rank_support_v<>
 rank_support_v5<>
diff --git a/test/select_support_test.typedef b/test/select_support_test.typedef
index 71c8a433a..19a50a792 100644
--- a/test/select_support_test.typedef
+++ b/test/select_support_test.typedef
@@ -1,4 +1,5 @@
 select_support_mcl<>
+select_support_hyb_sd<1>
 select_support_rrr<>
 select_support_sd<1>
 select_support_sd<0>
diff --git a/tutorial/Makefile b/tutorial/Makefile
index fd926989a..99786e9db 100644
--- a/tutorial/Makefile
+++ b/tutorial/Makefile
@@ -1,6 +1,6 @@
 include ../Make.helper
 CXX_FLAGS=$(MY_CXX_FLAGS) $(MY_CXX_OPT_FLAGS) -I$(INC_DIR) -L$(LIB_DIR) 
-CCLIB=-lsdsl -ldivsufsort -ldivsufsort64 
+CCLIB=-ldivsufsort -ldivsufsort64 
 SOURCES=$(wildcard *.cpp)
 EXECS=$(SOURCES:.cpp=.x)