Skip to content

Commit

Permalink
Switch to hasher based hashing.
Browse files Browse the repository at this point in the history
Signed-off-by: Maxim Zaks <[email protected]>
  • Loading branch information
mzaks committed Oct 22, 2024
1 parent 92e2230 commit cacafd2
Show file tree
Hide file tree
Showing 29 changed files with 1,169 additions and 1,174 deletions.
12 changes: 1 addition & 11 deletions stdlib/src/builtin/dtype.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ These are Mojo built-ins, so you don't need to import them.
"""

from collections import KeyElement
from hashlib._hasher import _HashableWithHasher, _Hasher
from sys import sizeof, bitwidthof, os_is_windows

alias _mIsSigned = UInt8(1)
Expand All @@ -33,7 +32,6 @@ struct DType(
Representable,
KeyElement,
CollectionElementNew,
_HashableWithHasher,
):
"""Represents DType and provides methods for working with it."""

Expand Down Expand Up @@ -299,15 +297,7 @@ struct DType(
self._as_i8(), rhs._as_i8()
)

fn __hash__(self) -> UInt:
"""Return a 64-bit hash for this `DType` value.
Returns:
A 64-bit integer hash of this `DType` value.
"""
return hash(UInt8(self._as_i8()))

fn __hash__[H: _Hasher](self, inout hasher: H):
fn __hash__[H: Hasher](self, inout hasher: H):
"""Updates hasher with this `DType` value.
Parameters:
Expand Down
16 changes: 1 addition & 15 deletions stdlib/src/builtin/int.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ from collections import KeyElement

from builtin._documentation import doc_private
from builtin._math import Ceilable, CeilDivable, Floorable, Truncable
from hashlib.hash import _hash_simd
from hashlib._hasher import _HashableWithHasher, _Hasher
from builtin.io import _snprintf
from collections.string import (
_calc_initial_buffer_size_int32,
Expand Down Expand Up @@ -287,7 +285,6 @@ struct Int(
KeyElement,
Roundable,
IntLike,
_HashableWithHasher,
):
"""This type represents an integer value."""

Expand Down Expand Up @@ -1103,18 +1100,7 @@ struct Int(
"""
return str(self)

fn __hash__(self) -> UInt:
"""Hash the int using builtin hash.
Returns:
A 64-bit hash value. This value is _not_ suitable for cryptographic
uses. Its intended usage is for data structures. See the `hash`
builtin documentation for more details.
"""
# TODO(MOCO-636): switch to DType.index
return _hash_simd(Scalar[DType.int64](self))

fn __hash__[H: _Hasher](self, inout hasher: H):
fn __hash__[H: Hasher](self, inout hasher: H):
"""Updates hasher with this int value.
Parameters:
Expand Down
15 changes: 1 addition & 14 deletions stdlib/src/builtin/simd.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ from bit import pop_count
from builtin._documentation import doc_private
from builtin._math import Ceilable, CeilDivable, Floorable, Truncable
from builtin.dtype import _uint_type_of_width
from hashlib.hash import _hash_simd
from hashlib._hasher import _HashableWithHasher, _Hasher
from builtin.format_int import _try_write_int
from collections import InlineArray
from memory import bitcast, UnsafePointer
Expand Down Expand Up @@ -177,7 +175,6 @@ struct SIMD[type: DType, size: Int](
Floorable,
Writable,
Hashable,
_HashableWithHasher,
Intable,
Powable,
Representable,
Expand Down Expand Up @@ -1536,17 +1533,7 @@ struct SIMD[type: DType, size: Int](
# TODO: see how can we implement this.
return llvm_intrinsic["llvm.round", Self, has_side_effect=False](self)

fn __hash__(self) -> UInt:
"""Hash the value using builtin hash.
Returns:
A 64-bit hash value. This value is _not_ suitable for cryptographic
uses. Its intended usage is for data structures. See the `hash`
builtin documentation for more details.
"""
return _hash_simd(self)

fn __hash__[H: _Hasher](self, inout hasher: H):
fn __hash__[H: Hasher](self, inout hasher: H):
"""Updates hasher with this SIMD value.
Parameters:
Expand Down
15 changes: 2 additions & 13 deletions stdlib/src/builtin/string_literal.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ from sys.ffi import c_char

from memory import memcpy, UnsafePointer
from collections import List
from hashlib._hasher import _HashableWithHasher, _Hasher
from hashlib.hasher import Hashable, Hasher
from utils import StringRef, Span, StringSlice, StaticString
from utils import Writable, Writer
from utils._visualizers import lldb_formatter_wrapping_type
Expand Down Expand Up @@ -49,7 +49,6 @@ struct StringLiteral(
Stringable,
FloatableRaising,
BytesCollectionElement,
_HashableWithHasher,
):
"""This type represents a string literal.
Expand Down Expand Up @@ -264,17 +263,7 @@ struct StringLiteral(
"""
return self.__str__().__repr__()

fn __hash__(self) -> UInt:
"""Hash the underlying buffer using builtin hash.
Returns:
A 64-bit hash value. This value is _not_ suitable for cryptographic
uses. Its intended usage is for data structures. See the `hash`
builtin documentation for more details.
"""
return hash(self.unsafe_ptr(), len(self))

fn __hash__[H: _Hasher](self, inout hasher: H):
fn __hash__[H: Hasher](self, inout hasher: H):
"""Updates hasher with the underlying bytes.
Parameters:
Expand Down
17 changes: 2 additions & 15 deletions stdlib/src/builtin/uint.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,12 @@ These are Mojo built-ins, so you don't need to import them.
from sys import bitwidthof
from utils._visualizers import lldb_formatter_wrapping_type
from builtin._documentation import doc_private
from hashlib.hash import _hash_simd
from hashlib._hasher import _HashableWithHasher, _Hasher


@lldb_formatter_wrapping_type
@value
@register_passable("trivial")
struct UInt(IntLike, _HashableWithHasher):
struct UInt(IntLike, Hashable):
"""This type represents an unsigned integer.
An unsigned integer is represents a positive integral number.
Expand Down Expand Up @@ -151,18 +149,7 @@ struct UInt(IntLike, _HashableWithHasher):
"""
return "UInt(" + str(self) + ")"

fn __hash__(self) -> UInt:
"""Hash the UInt using builtin hash.
Returns:
A 64-bit hash value. This value is _not_ suitable for cryptographic
uses. Its intended usage is for data structures. See the `hash`
builtin documentation for more details.
"""
# TODO(MOCO-636): switch to DType.index
return _hash_simd(Scalar[DType.uint64](self))

fn __hash__[H: _Hasher](self, inout hasher: H):
fn __hash__[H: Hasher](self, inout hasher: H):
"""Updates hasher with this uint value.
Parameters:
Expand Down
26 changes: 13 additions & 13 deletions stdlib/src/collections/dict.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ struct DictEntry[K: KeyElement, V: CollectionElement](
V: The value type of the dict.
"""

var hash: Int
var hash: UInt64
"""`key.__hash__()`, stored so hashing isn't re-computed during dict lookup."""
var key: K
"""The unique key for the entry."""
Expand Down Expand Up @@ -315,7 +315,7 @@ struct _DictIndex:
fn __moveinit__(inout self, owned existing: Self):
self.data = existing.data

fn get_index(self, reserved: Int, slot: Int) -> Int:
fn get_index(self, reserved: Int, slot: UInt64) -> Int:
if reserved <= 128:
var data = self.data.bitcast[Int8]()
return int(data.load(slot & (reserved - 1)))
Expand All @@ -329,7 +329,7 @@ struct _DictIndex:
var data = self.data.bitcast[Int64]()
return int(data.load(slot & (reserved - 1)))

fn set_index(inout self, reserved: Int, slot: Int, value: Int):
fn set_index(inout self, reserved: Int, slot: UInt64, value: Int):
if reserved <= 128:
var data = self.data.bitcast[Int8]()
return data.store(slot & (reserved - 1), value)
Expand Down Expand Up @@ -770,7 +770,7 @@ struct Dict[K: KeyElement, V: CollectionElement](
"""
var hash = hash(key)
var found: Bool
var slot: Int
var slot: UInt64
var index: Int
found, slot, index = self._find_index(hash, key)
if found:
Expand Down Expand Up @@ -835,7 +835,7 @@ struct Dict[K: KeyElement, V: CollectionElement](
"""
var hash = hash(key)
var found: Bool
var slot: Int
var slot: UInt64
var index: Int
found, slot, index = self._find_index(hash, key)
if found:
Expand Down Expand Up @@ -962,7 +962,7 @@ struct Dict[K: KeyElement, V: CollectionElement](
if not safe_context:
self._maybe_resize()
var found: Bool
var slot: Int
var slot: UInt64
var index: Int
found, slot, index = self._find_index(entry.hash, entry.key)

Expand All @@ -972,30 +972,30 @@ struct Dict[K: KeyElement, V: CollectionElement](
self.size += 1
self._n_entries += 1

fn _get_index(self, slot: Int) -> Int:
fn _get_index(self, slot: UInt64) -> Int:
return self._index.get_index(self._reserved(), slot)

fn _set_index(inout self, slot: Int, index: Int):
fn _set_index(inout self, slot: UInt64, index: Int):
return self._index.set_index(self._reserved(), slot, index)

fn _next_index_slot(self, inout slot: Int, inout perturb: UInt64):
fn _next_index_slot(self, inout slot: UInt64, inout perturb: UInt64):
alias PERTURB_SHIFT = 5
perturb >>= PERTURB_SHIFT
slot = ((5 * slot) + int(perturb + 1)) & (self._reserved() - 1)

fn _find_empty_index(self, hash: Int) -> Int:
fn _find_empty_index(self, hash: UInt64) -> UInt64:
var slot = hash & (self._reserved() - 1)
var perturb = bitcast[DType.uint64](Int64(hash))
var perturb = hash
while True:
var index = self._get_index(slot)
if index == Self.EMPTY:
return slot
self._next_index_slot(slot, perturb)

fn _find_index(self, hash: Int, key: K) -> (Bool, Int, Int):
fn _find_index(self, hash: UInt64, key: K) -> (Bool, UInt64, Int):
# Return (found, slot, index)
var slot = hash & (self._reserved() - 1)
var perturb = bitcast[DType.uint64](Int64(hash))
var perturb = hash
while True:
var index = self._get_index(slot)
if index == Self.EMPTY:
Expand Down
17 changes: 1 addition & 16 deletions stdlib/src/collections/set.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ from .dict import (
)


struct Set[T: KeyElement](Sized, Comparable, Hashable, Boolable):
struct Set[T: KeyElement](Sized, Comparable, Boolable):
"""A set data type.
O(1) average-case amortized add, remove, and membership check.
Expand Down Expand Up @@ -287,21 +287,6 @@ struct Set[T: KeyElement](Sized, Comparable, Hashable, Boolable):
"""
return len(self._data)

fn __hash__(self) -> UInt:
"""A hash value of the elements in the set.
The hash value is order independent, so s1 == s2 -> hash(s1) == hash(s2).
Returns:
A hash value of the set suitable for non-cryptographic purposes.
"""
var hash_value = 0
# Hash combination needs to be commutative so iteration order
# doesn't impact the hash value.
for e in self:
hash_value ^= hash(e[])
return hash_value

@no_inline
fn __str__[U: RepresentableKeyElement](self: Set[U]) -> String:
"""Returns the string representation of the set.
Expand Down
14 changes: 1 addition & 13 deletions stdlib/src/collections/string.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ from memory import UnsafePointer, memcmp, memcpy
from python import PythonObject

from sys.intrinsics import _type_is_eq
from hashlib._hasher import _HashableWithHasher, _Hasher

from utils import (
Span,
Expand Down Expand Up @@ -704,7 +703,6 @@ struct String(
Writer,
CollectionElementNew,
FloatableRaising,
_HashableWithHasher,
):
"""Represents a mutable string."""

Expand Down Expand Up @@ -1949,17 +1947,7 @@ struct String(
l_idx += 1
return self[l_idx:]

fn __hash__(self) -> UInt:
"""Hash the underlying buffer using builtin hash.
Returns:
A 64-bit hash value. This value is _not_ suitable for cryptographic
uses. Its intended usage is for data structures. See the `hash`
builtin documentation for more details.
"""
return hash(self.as_string_slice())

fn __hash__[H: _Hasher](self, inout hasher: H):
fn __hash__[H: Hasher](self, inout hasher: H):
"""Updates hasher with the underlying bytes.
Parameters:
Expand Down
2 changes: 1 addition & 1 deletion stdlib/src/hashlib/__init__.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@
# limitations under the License.
# ===----------------------------------------------------------------------=== #
"""Implements the hashlib package that provides various hash algorithms."""
from .hash import hash, Hashable
from .hash import hash
Loading

0 comments on commit cacafd2

Please sign in to comment.