YosysHQ · widlarizer · Oct 1, 2024 · Oct 1, 2024 · Oct 4, 2024 · Oct 9, 2024
@@ -47,7 +47,7 @@ struct Scheduler {
 	struct Vertex {
 		T *data;
 		Vertex *prev, *next;
-		pool<Vertex*, hash_ptr_ops> preds, succs;
+		pool<Vertex*> preds, succs;
 
 		Vertex() : data(NULL), prev(this), next(this) {}
 		Vertex(T *data) : data(data), prev(NULL), next(NULL) {}
@@ -300,10 +300,10 @@ struct FlowGraph {
 	};
 
 	std::vector<Node*> nodes;
-	dict<const RTLIL::Wire*, pool<Node*, hash_ptr_ops>> wire_comb_defs, wire_sync_defs, wire_uses;
-	dict<Node*, pool<const RTLIL::Wire*>, hash_ptr_ops> node_comb_defs, node_sync_defs, node_uses;
+	dict<const RTLIL::Wire*, pool<Node*>> wire_comb_defs, wire_sync_defs, wire_uses;
+	dict<Node*, pool<const RTLIL::Wire*>> node_comb_defs, node_sync_defs, node_uses;
 	dict<const RTLIL::Wire*, bool> wire_def_inlinable;
-	dict<const RTLIL::Wire*, dict<Node*, bool, hash_ptr_ops>> wire_use_inlinable;
+	dict<const RTLIL::Wire*, dict<Node*, bool>> wire_use_inlinable;
 	dict<RTLIL::SigBit, bool> bit_has_state;
 
 	~FlowGraph()
@@ -365,7 +365,7 @@ struct FlowGraph {
 		return false;
 	}
 
-	bool is_inlinable(const RTLIL::Wire *wire, const pool<Node*, hash_ptr_ops> &nodes) const
+	bool is_inlinable(const RTLIL::Wire *wire, const pool<Node*> &nodes) const
 	{
 		// Can the wire be inlined, knowing that the given nodes are reachable?
 		if (nodes.size() != 1)
@@ -3080,7 +3080,7 @@ struct CxxrtlWorker {
 			// without feedback arcs can generally be evaluated in a single pass, i.e. it always requires only
 			// a single delta cycle.
 			Scheduler<FlowGraph::Node> scheduler;
-			dict<FlowGraph::Node*, Scheduler<FlowGraph::Node>::Vertex*, hash_ptr_ops> node_vertex_map;
+			dict<FlowGraph::Node*, Scheduler<FlowGraph::Node>::Vertex*> node_vertex_map;
 			for (auto node : flow.nodes)
 				node_vertex_map[node] = scheduler.add(node);
 			for (auto node_comb_def : flow.node_comb_defs) {
@@ -3095,7 +3095,7 @@ struct CxxrtlWorker {
 
 			// Find out whether the order includes any feedback arcs.
 			std::vector<FlowGraph::Node*> node_order;
-			pool<FlowGraph::Node*, hash_ptr_ops> evaluated_nodes;
+			pool<FlowGraph::Node*> evaluated_nodes;
 			pool<const RTLIL::Wire*> feedback_wires;
 			for (auto vertex : scheduler.schedule()) {
 				auto node = vertex->data;
@@ -3139,7 +3139,7 @@ struct CxxrtlWorker {
 			}
 
 			// Discover nodes reachable from primary outputs (i.e. members) and collect reachable wire users.
-			pool<FlowGraph::Node*, hash_ptr_ops> worklist;
+			pool<FlowGraph::Node*> worklist;
 			for (auto node : flow.nodes) {
 				if (node->type == FlowGraph::Node::Type::CELL_EVAL && !is_internal_cell(node->cell->type))
 					worklist.insert(node); // node evaluates a submodule
@@ -3159,8 +3159,8 @@ struct CxxrtlWorker {
 							worklist.insert(node); // node drives public wires
 				}
 			}
-			dict<const RTLIL::Wire*, pool<FlowGraph::Node*, hash_ptr_ops>> live_wires;
-			pool<FlowGraph::Node*, hash_ptr_ops> live_nodes;
+			dict<const RTLIL::Wire*, pool<FlowGraph::Node*>> live_wires;
+			pool<FlowGraph::Node*> live_nodes;
 			while (!worklist.empty()) {
 				auto node = worklist.pop();
 				live_nodes.insert(node);
@@ -3290,15 +3290,15 @@ struct CxxrtlWorker {
 
 				// Discover nodes reachable from primary outputs (i.e. outlines) up until primary inputs (i.e. members)
 				// and collect reachable wire users.
-				pool<FlowGraph::Node*, hash_ptr_ops> worklist;
+				pool<FlowGraph::Node*> worklist;
 				for (auto node : flow.nodes) {
 					if (flow.node_comb_defs.count(node))
 						for (auto wire : flow.node_comb_defs[node])
 							if (debug_wire_types[wire].is_outline())
 								worklist.insert(node); // node drives outline
 				}
-				dict<const RTLIL::Wire*, pool<FlowGraph::Node*, hash_ptr_ops>> debug_live_wires;
-				pool<FlowGraph::Node*, hash_ptr_ops> debug_live_nodes;
+				dict<const RTLIL::Wire*, pool<FlowGraph::Node*>> debug_live_wires;
+				pool<FlowGraph::Node*> debug_live_nodes;
 				while (!worklist.empty()) {
 					auto node = worklist.pop();
 					debug_live_nodes.insert(node);

@@ -0,0 +1,153 @@
+Hashing and associative data structures in Yosys
+------------------------------------------------
+
+Container classes based on hashing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Yosys uses ``dict<K, T>`` and ``pool<T>`` as main container classes.
+``dict<K, T>`` is essentially a replacement for ``std::unordered_map<K, T>``
+and ``pool<T>`` is a replacement for ``std::unordered_set<T>``.
+The main characteristics are:
+
+* ``dict<K, T>`` and ``pool<T>`` are about 2x faster than the std containers
+   (though this claim hasn't been verified for over 10 years)
+
+* references to elements in a ``dict<K, T>`` or ``pool<T>`` are invalidated by
+   insert and remove operations (similar to ``std::vector<T>`` on ``push_back()``).
+
+* some iterators are invalidated by ``erase()``. specifically, iterators
+   that have not passed the erased element yet are invalidated. (``erase()``
+   itself returns valid iterator to the next element.)
+
+* no iterators are invalidated by ``insert()``. elements are inserted at
+   ``begin()``. i.e. only a new iterator that starts at ``begin()`` will see the
+   inserted elements.
+
+* the method ``.count(key, iterator)`` is like ``.count(key)`` but only
+   considers elements that can be reached via the iterator.
+
+* iterators can be compared. ``it1 < it2`` means that the position of ``t2``
+   can be reached via ``t1`` but not vice versa.
+
+* the method ``.sort()`` can be used to sort the elements in the container
+   the container stays sorted until elements are added or removed.
+
+* ``dict<K, T>`` and ``pool<T>`` will have the same order of iteration across
+   all compilers, standard libraries and architectures.
+
+In addition to ``dict<K, T>`` and ``pool<T>`` there is also an ``idict<K>`` that
+creates a bijective map from ``K`` to the integers. For example:
+
+::
+
+   idict<string, 42> si;
+   log("%d\n", si("hello"));      // will print 42
+   log("%d\n", si("world"));      // will print 43
+   log("%d\n", si.at("world"));   // will print 43
+   log("%d\n", si.at("dummy"));   // will throw exception
+   log("%s\n", si[42].c_str()));  // will print hello
+   log("%s\n", si[43].c_str()));  // will print world
+   log("%s\n", si[44].c_str()));  // will throw exception
+
+It is not possible to remove elements from an idict.
+
+Finally ``mfp<K>`` implements a merge-find set data structure (aka. disjoint-set
+or union-find) over the type ``K`` ("mfp" = merge-find-promote).
+
+The hash function
+~~~~~~~~~~~~~~~~~
+
+The hash function generally used in Yosys is the XOR version of DJB2:
+
+::
+
+   state = ((state << 5) + state) ^ value
+
+This is an old-school hash designed to hash ASCII characters. Yosys doesn't hash
+a lot of ASCII text, but it still happens to be a local optimum due to factors
+described later.
+
+Hash function quality is multi-faceted and highly dependent on what is being
+hashed. Yosys isn't concerned by any cryptographic qualities, instead the goal
+is minimizing total hashing collision risk given the data patterns within Yosys.
+In general, a good hash function typically folds values into a state accumulator
+with a mathematical function that is fast to compute and has some beneficial
+properties. One of these is the avalanche property, which demands that a small
+change such as flipping a bit or incrementing by one in the input produces a
+large, unpredictable change in the output. Additionally, the bit independence
+criterion states that any pair of output bits should change independently when
+any single input bit is inverted. These properties are important for avoiding
+hash collision on data patterns like the hash of a sequence not colliding with
+its permutation, not losing from the state the information added by hashing
+preceding elements, etc.
+
+DJB2 lacks these properties. Instead, since Yosys hashes large numbers of data
+structures composed of incrementing integer IDs, Yosys abuses the predictability
+of DJB2 to get lower hash collisions, with regular nature of the hashes
+surviving through the interaction with the "modulo prime" operations in the
+associative data structures. For example, some most common objects in Yosys are
+interned ``IdString``\ s of incrementing indices or ``SigBit``\ s with bit
+offsets into wire (represented by its unique ``IdString`` name) as the typical
+case. This is what makes DJB2 a local optimum. Additionally, the ADD version of
+DJB2 (like above but with addition instead of XOR) is used to this end for some
+types, abandoning the general pattern of folding values into a state value.
+
+Making a type hashable
+~~~~~~~~~~~~~~~~~~~~~~
+
+Let's first take a look at the external interface on a simplified level.
+Generally, to get the hash for ``T obj``, you would call the utility function
+``run_hash<T>(const T& obj)``, corresponding to ``hash_top_ops<T>::hash(obj)``,
+the default implementation of which is ``hash_ops<T>::hash_eat(Hasher(), obj)``.
+``Hasher`` is the class actually implementing the hash function, hiding its
+initialized internal state, and passing it out on ``hash_t yield()`` with
+perhaps some finalization steps.
+
+``hash_ops<T>`` is the star of the show. By default it pulls the ``Hasher h``
+through a ``Hasher T::hash_eat(Hasher h)`` method. That's the method you have to
+implement to make a record (class or struct) type easily hashable with Yosys
+hashlib associative data structures.
+
+``hash_ops<T>`` is specialized for built-in types like ``int`` or ``bool`` and
+treats pointers the same as integers, so it doesn't dereference pointers. Since
+many RTLIL data structures like ``RTLIL::Wire`` carry their own unique index
+``Hasher::hash_t hashidx_;``, there are specializations for ``hash_ops<Wire*>``
+and others in ``kernel/hashlib.h`` that actually dereference the pointers and
+call ``hash_eat`` on the instances pointed to.
+
+``hash_ops<T>`` is also specialized for simple compound types like
+``std::pair<U>`` by calling hash_eat in sequence on its members. For flexible
+size containers like ``std::vector<U>`` the size of the container is hashed
+first. That is also how implementing hashing for a custom record data type
+should be - unless there is strong reason to do otherwise, call ``h.eat(m)`` on
+the ``Hasher h`` you have received for each member in sequence and ``return
+h;``. If you do have a strong reason to do so, look at how
+``hash_top_ops<RTLIL::SigBit>`` is implemented in ``kernel/rtlil.h``.
+
+Porting plugins from the legacy interface
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Previously, the interface to implement hashing on custom types was just
+``unsigned int T::hash() const``. This meant hashes for members were computed
+independently and then ad-hoc combined with the hash function with some xorshift
+operations thrown in to mix bits together somewhat. A plugin can stay compatible
+with both versions prior and after the break by implementing the aforementioned
+current interface and redirecting the legacy one:
+
+``void Hasher::eat(const T& t)`` hashes ``t`` into its internal state by also
+redirecting to ``hash_ops<T>``
+
+.. code-block:: cpp
+   :caption: Example hash compatibility wrapper
+   :name: hash_plugin_compat
+
+   inline unsigned int T::hash() const {
+       Hasher h;
+       return (unsigned int)hash_eat(h).yield();
+   }
+
+To get hashes for Yosys types, you can temporarily use the templated deprecated
+``mkhash`` function until the majority of your plugin's users switch to a newer
+version and live with the warnings, or set up a custom ``#ifdef``-based solution
+if you really need to.
+Feel free to contact Yosys maintainers with related issues.
@@ -38,3 +38,4 @@ as reference to implement a similar system in any language.
    formats/index
    extending_yosys/index
    techmap
+   hashing
diff --git a/examples/cxx-api/scopeinfo_example.cc b/examples/cxx-api/scopeinfo_example.cc
@@ -90,7 +90,7 @@ struct ScopeinfoExamplePass : public Pass {
 
 				// Shuffle wires so this example produces more interesting outputs
 				std::sort(wires.begin(), wires.end(), [](Wire *a, Wire *b) {
-					return mkhash_xorshift(a->name.hash() * 0x2c9277b5) < mkhash_xorshift(b->name.hash() * 0x2c9277b5);
+					return mkhash_xorshift(run_hash(a->name) * 0x2c9277b5) < mkhash_xorshift(run_hash(b->name) * 0x2c9277b5);
 				});
 
 				ModuleHdlnameIndex index(module);

@@ -177,7 +177,7 @@ namespace AST
 	{
 		// for dict<> and pool<>
 		unsigned int hashidx_;
-		unsigned int hash() const { return hashidx_; }
+		Hasher hash_eat(Hasher h) const { h.eat(hashidx_); return h; }
 
 		// this nodes type
 		AstNodeType type;

diff --git a/frontends/verific/verific.cc b/frontends/verific/verific.cc
@@ -611,7 +611,7 @@ RTLIL::SigSpec VerificImporter::operatorInportCase(Instance *inst, const char *p
 	}
 }
 
-RTLIL::SigSpec VerificImporter::operatorOutput(Instance *inst, const pool<Net*, hash_ptr_ops> *any_all_nets)
+RTLIL::SigSpec VerificImporter::operatorOutput(Instance *inst, const pool<Net*> *any_all_nets)
 {
 	RTLIL::SigSpec sig;
 	RTLIL::Wire *dummy_wire = NULL;
@@ -1567,9 +1567,9 @@ void VerificImporter::import_netlist(RTLIL::Design *design, Netlist *nl, std::ma
 
 	module->fixup_ports();
 
-	dict<Net*, char, hash_ptr_ops> init_nets;
-	pool<Net*, hash_ptr_ops> anyconst_nets, anyseq_nets;
-	pool<Net*, hash_ptr_ops> allconst_nets, allseq_nets;
+	dict<Net*, char> init_nets;
+	pool<Net*> anyconst_nets, anyseq_nets;
+	pool<Net*> allconst_nets, allseq_nets;
 	any_all_nets.clear();
 
 	FOREACH_NET_OF_NETLIST(nl, mi, net)
@@ -1832,10 +1832,10 @@ void VerificImporter::import_netlist(RTLIL::Design *design, Netlist *nl, std::ma
 		module->connect(net_map_at(net), module->Anyseq(new_verific_id(net)));
 
 #ifdef VERIFIC_SYSTEMVERILOG_SUPPORT
-	pool<Instance*, hash_ptr_ops> sva_asserts;
-	pool<Instance*, hash_ptr_ops> sva_assumes;
-	pool<Instance*, hash_ptr_ops> sva_covers;
-	pool<Instance*, hash_ptr_ops> sva_triggers;
+	pool<Instance*> sva_asserts;
+	pool<Instance*> sva_assumes;
+	pool<Instance*> sva_covers;
+	pool<Instance*> sva_triggers;
 #endif
 
 	pool<RTLIL::Cell*> past_ffs;

diff --git a/frontends/verific/verific.h b/frontends/verific/verific.h
@@ -71,7 +71,7 @@ struct VerificImporter
 
 	std::map<Verific::Net*, RTLIL::SigBit> net_map;
 	std::map<Verific::Net*, Verific::Net*> sva_posedge_map;
-	pool<Verific::Net*, hash_ptr_ops> any_all_nets;
+	pool<Verific::Net*> any_all_nets;
 
 	bool mode_gates, mode_keep, mode_nosva, mode_names, mode_verific;
 	bool mode_autocover, mode_fullinit;
@@ -89,7 +89,7 @@ struct VerificImporter
 	RTLIL::SigSpec operatorInput2(Verific::Instance *inst);
 	RTLIL::SigSpec operatorInport(Verific::Instance *inst, const char *portname);
 	RTLIL::SigSpec operatorInportCase(Verific::Instance *inst, const char *portname);
-	RTLIL::SigSpec operatorOutput(Verific::Instance *inst, const pool<Verific::Net*, hash_ptr_ops> *any_all_nets = nullptr);
+	RTLIL::SigSpec operatorOutput(Verific::Instance *inst, const pool<Verific::Net*> *any_all_nets = nullptr);
 
 	bool import_netlist_instance_gates(Verific::Instance *inst, RTLIL::IdString inst_name);
 	bool import_netlist_instance_cells(Verific::Instance *inst, RTLIL::IdString inst_name);

diff --git a/frontends/verific/verificsva.cc b/frontends/verific/verificsva.cc
@@ -1051,7 +1051,7 @@ struct VerificSvaImporter
 				msg.c_str(), inst->View()->Owner()->Name(), inst->Name()), inst->Linefile());
 	}
 
-	dict<Net*, bool, hash_ptr_ops> check_expression_cache;
+	dict<Net*, bool> check_expression_cache;
 
 	bool check_expression(Net *net, bool raise_error = false)
 	{

diff --git a/guidelines/GettingStarted b/guidelines/GettingStarted
@@ -37,57 +37,15 @@ And then executed using the following command:
 Yosys Data Structures
 ---------------------
 
-Here is a short list of data structures that you should make yourself familiar
-with before you write C++ code for Yosys. The following data structures are all
-defined when "kernel/yosys.h" is included and USING_YOSYS_NAMESPACE is used.
+  1. Container classes based on hashing
 
-  1. Yosys Container Classes
+Yosys heavily relies on custom container data structures such as dict or pool
+defined in kernel/hashlib.h.
+dict<K, T> is essentially a replacement for std::unordered_map<K, T>
+and pool<T> is a replacement for std::unordered_set<T>. Please refer to
+docs/source/yosys_internals/hashing.rst for more information on those.
 
-Yosys uses dict<K, T> and pool<T> as main container classes. dict<K, T> is
-essentially a replacement for std::unordered_map<K, T> and pool<T> is a
-replacement for std::unordered_set<T>. The main characteristics are:
-
-	- dict<K, T> and pool<T> are about 2x faster than the std containers
-
-	- references to elements in a dict<K, T> or pool<T> are invalidated by
-	  insert and remove operations (similar to std::vector<T> on push_back()).
-
-	- some iterators are invalidated by erase(). specifically, iterators
-	  that have not passed the erased element yet are invalidated. (erase()
-	  itself returns valid iterator to the next element.)
-
-	- no iterators are invalidated by insert(). elements are inserted at
-	  begin(). i.e. only a new iterator that starts at begin() will see the
-	  inserted elements.
-
-	- the method .count(key, iterator) is like .count(key) but only
-	  considers elements that can be reached via the iterator.
-
-	- iterators can be compared. it1 < it2 means that the position of t2
-	  can be reached via t1 but not vice versa.
-
-	- the method .sort() can be used to sort the elements in the container
-	  the container stays sorted until elements are added or removed.
-
-	- dict<K, T> and pool<T> will have the same order of iteration across
-	  all compilers, standard libraries and architectures.
-
-In addition to dict<K, T> and pool<T> there is also an idict<K> that
-creates a bijective map from K to the integers. For example:
-
-	idict<string, 42> si;
-	log("%d\n", si("hello"));      // will print 42
-	log("%d\n", si("world"));      // will print 43
-	log("%d\n", si.at("world"));   // will print 43
-	log("%d\n", si.at("dummy"));   // will throw exception
-	log("%s\n", si[42].c_str()));  // will print hello
-	log("%s\n", si[43].c_str()));  // will print world
-	log("%s\n", si[44].c_str()));  // will throw exception
-
-It is not possible to remove elements from an idict.
-
-Finally mfp<K> implements a merge-find set data structure (aka. disjoint-set or
-union-find) over the type K ("mfp" = merge-find-promote).
+Otherwise, Yosys makes use of the following:
 
   2. Standard STL data types