Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-89083: support UUID version 7 (monotonous version) (RFC 9562) [abandoned proposal] #120830

Closed
wants to merge 15 commits into from
15 changes: 14 additions & 1 deletion Doc/library/uuid.rst
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,12 @@ which relays any information about the UUID's safety, using this enumeration:

.. attribute:: UUID.version

The UUID version number (1 through 5, meaningful only when the variant is
The UUID version number (1 through 7, meaningful only when the variant is
:const:`RFC_4122`).

.. versionadded:: 3.14
Added UUID version 7

.. attribute:: UUID.is_safe

An enumeration of :class:`SafeUUID` which indicates whether the platform
Expand Down Expand Up @@ -216,6 +219,16 @@ The :mod:`uuid` module defines the following functions:

.. index:: single: uuid5


.. function:: uuid7()

TODO

.. versionadded:: 3.14

.. index:: single: uuid7


The :mod:`uuid` module defines the following namespace identifiers for use with
:func:`uuid3` or :func:`uuid5`.

Expand Down
6 changes: 6 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ symtable

(Contributed by Bénédikt Tran in :gh:`120029`.)

uuid
----

* Add :func:`uuid.uuid7` for UUID version 7 as specified by :rfc:`9562`.

(Contributed by Bénédikt Tran in :gh:`89083`.)

Optimizations
=============
Expand Down
160 changes: 157 additions & 3 deletions Lib/test/test_uuid.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import random
import unittest
from test import support
from test.support import import_helper
Expand All @@ -10,6 +11,7 @@
import pickle
import sys
import weakref
from itertools import product
from unittest import mock

py_uuid = import_helper.import_fresh_module('uuid', blocked=['_uuid'])
Expand Down Expand Up @@ -267,7 +269,7 @@ def test_exceptions(self):

# Version number out of range.
badvalue(lambda: self.uuid.UUID('00'*16, version=0))
badvalue(lambda: self.uuid.UUID('00'*16, version=6))
badvalue(lambda: self.uuid.UUID('00'*16, version=42))

# Integer value out of range.
badvalue(lambda: self.uuid.UUID(int=-1))
Expand Down Expand Up @@ -588,15 +590,15 @@ def test_uuid1_bogus_return_value(self):

def test_uuid1_time(self):
with mock.patch.object(self.uuid, '_generate_time_safe', None), \
mock.patch.object(self.uuid, '_last_timestamp', None), \
mock.patch.object(self.uuid, '_last_timestamp_v1', None), \
mock.patch.object(self.uuid, 'getnode', return_value=93328246233727), \
mock.patch('time.time_ns', return_value=1545052026752910643), \
mock.patch('random.getrandbits', return_value=5317): # guaranteed to be random
u = self.uuid.uuid1()
self.assertEqual(u, self.uuid.UUID('a7a55b92-01fc-11e9-94c5-54e1acf6da7f'))

with mock.patch.object(self.uuid, '_generate_time_safe', None), \
mock.patch.object(self.uuid, '_last_timestamp', None), \
mock.patch.object(self.uuid, '_last_timestamp_v1', None), \
mock.patch('time.time_ns', return_value=1545052026752910643):
u = self.uuid.uuid1(node=93328246233727, clock_seq=5317)
self.assertEqual(u, self.uuid.UUID('a7a55b92-01fc-11e9-94c5-54e1acf6da7f'))
Expand Down Expand Up @@ -681,6 +683,158 @@ def test_uuid5(self):
equal(u, self.uuid.UUID(v))
equal(str(u), v)

def test_uuid7(self):
equal = self.assertEqual
u = self.uuid.uuid7()
equal(u.variant, self.uuid.RFC_4122)
equal(u.version, 7)

# 1 Jan 2023 12:34:56.123_456_789
fake_nanoseconds = 1672533296_123_456_789 # ns precision
expect_timestamp, _ = divmod(fake_nanoseconds, 1_000_000)
rand_b_64_bytes = os.urandom(8)
with mock.patch.object(self.uuid, '_last_timestamp_v7', None), \
mock.patch.object(self.uuid, '_last_counter_v7_a', 0), \
mock.patch.object(self.uuid, '_last_counter_v7_b', 0), \
mock.patch('time.time_ns', return_value=fake_nanoseconds), \
mock.patch('os.urandom', return_value=rand_b_64_bytes):
u = self.uuid.uuid7()
equal(u.variant, self.uuid.RFC_4122)
equal(u.version, 7)
equal(self.uuid._last_timestamp_v7, expect_timestamp)
unix_ts_ms = expect_timestamp & 0xffffffffffff
equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms)
rand_a = 1871 # == int(0.4567890 * 4096)
equal((u.int >> 64) & 0x0fff, rand_a)
rand_b = int.from_bytes(rand_b_64_bytes) & 0x3fffffffffffffff
equal(u.int & 0x3fffffffffffffff, rand_b)

def test_uuid7_monotonicity(self):
equal = self.assertEqual

us = [self.uuid.uuid7() for _ in range(10_000)]
equal(us, sorted(us))

with mock.patch.multiple(self.uuid, _last_counter_v7_a=0, _last_counter_v7_b=0):
# 1 Jan 2023 12:34:56.123_456_789
fake_nanoseconds = 1672533296_123_456_789 # ns precision
expect_timestamp, _ = divmod(fake_nanoseconds, 1_000_000)
with mock.patch.object(self.uuid, '_last_timestamp_v7', expect_timestamp):
with mock.patch('time.time_ns', return_value=fake_nanoseconds), \
mock.patch('os.urandom', return_value=b'\x01') as os_urandom_fake:
u1 = self.uuid.uuid7()
os_urandom_fake.assert_called_once_with(4)
# 1871 = int(0.456_789 * 4096)
equal(self.uuid._last_counter_v7_a, 1871)
equal((u1.int >> 64) & 0x0fff, 1871)
equal(self.uuid._last_counter_v7_b, 1)
equal(u1.int & 0x3fffffffffffffff, 1)

# 1 Jan 2023 12:34:56.123_457_032 (same millisecond but not same prec)
next_fake_nanoseconds = 1672533296_123_457_032
with mock.patch('time.time_ns', return_value=next_fake_nanoseconds), \
mock.patch('os.urandom', return_value=b'\x01') as os_urandom_fake:
u2 = self.uuid.uuid7()
os_urandom_fake.assert_called_once_with(4)
# 1872 = int(0.457_032 * 4096)
equal(self.uuid._last_counter_v7_a, 1872)
equal((u2.int >> 64) & 0x0fff, 1872)
equal(self.uuid._last_counter_v7_b, 2)
equal(u2.int & 0x3fffffffffffffff, 2)

self.assertLess(u1, u2)
# 48-bit time component is the same
self.assertEqual(u1.int >> 80, u2.int >> 80)

def test_uuid7_timestamp_backwards(self):
equal = self.assertEqual
# 1 Jan 2023 12:34:56.123_456_789
fake_nanoseconds = 1672533296_123_456_789 # ns precision
expect_timestamp, _ = divmod(fake_nanoseconds, 1_000_000)
fake_last_timestamp_v7 = expect_timestamp + 1
fake_prev_rand_b = 123456
with mock.patch.object(self.uuid, '_last_timestamp_v7', fake_last_timestamp_v7), \
mock.patch.object(self.uuid, '_last_counter_v7_a', 0), \
mock.patch.object(self.uuid, '_last_counter_v7_b', fake_prev_rand_b), \
mock.patch('time.time_ns', return_value=fake_nanoseconds), \
mock.patch('os.urandom', return_value=b'\x00\x00\x00\x01') as os_urandom_fake:
u = self.uuid.uuid7()
os_urandom_fake.assert_called_once()
equal(u.variant, self.uuid.RFC_4122)
equal(u.version, 7)
equal(self.uuid._last_timestamp_v7, fake_last_timestamp_v7 + 1)
unix_ts_ms = (fake_last_timestamp_v7 + 1) & 0xffffffffffff
equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms)
rand_a = 1871 # == int(0.456789 * 4096)
equal(self.uuid._last_counter_v7_a, rand_a)
equal((u.int >> 64) & 0x0fff, rand_a)
rand_b = fake_prev_rand_b + 1 # 1 = os.urandom(4)
equal(self.uuid._last_counter_v7_b, rand_b)
equal(u.int & 0x3fffffffffffffff, rand_b)

def test_uuid7_overflow_rand_b(self):
equal = self.assertEqual
# 1 Jan 2023 12:34:56.123_456_789
fake_nanoseconds = 1672533296_123_456_789 # ns precision
expect_timestamp, _ = divmod(fake_nanoseconds, 1_000_000)
# same timestamp, but force an overflow on rand_b (not on rand_a)
new_rand_b_64_bytes = os.urandom(8)
with mock.patch.object(self.uuid, '_last_timestamp_v7', expect_timestamp), \
mock.patch.object(self.uuid, '_last_counter_v7_a', 0), \
mock.patch.object(self.uuid, '_last_counter_v7_b', 1 << 62), \
mock.patch('time.time_ns', return_value=fake_nanoseconds), \
mock.patch('os.urandom', return_value=new_rand_b_64_bytes):
u = self.uuid.uuid7()
equal(u.variant, self.uuid.RFC_4122)
equal(u.version, 7)
equal(self.uuid._last_timestamp_v7, expect_timestamp) # same
unix_ts_ms = expect_timestamp & 0xffffffffffff
equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms)
rand_a = 1871 + 1 # advance 'int(0.456789 * 4096)' by 1
equal(self.uuid._last_counter_v7_a, rand_a)
equal((u.int >> 64) & 0x0fff, rand_a)
rand_b = int.from_bytes(new_rand_b_64_bytes) & 0x3fffffffffffffff
equal(self.uuid._last_counter_v7_b, rand_b)
equal(u.int & 0x3fffffffffffffff, rand_b)

def test_uuid7_overflow_rand_a_and_rand_b(self):
equal = self.assertEqual
nanoseconds = [
1672533296_123_999_999, # to hit the overflow on rand_a
1704069296_123_456_789, # to hit 'timestamp_ms > _last_timestamp_v7'
]

# 1 Jan 2023 12:34:56.123_999_999
expect_timestamp_call_1, _ = divmod(nanoseconds[0], 1_000_000)
expect_timestamp_call_2, _ = divmod(nanoseconds[1], 1_000_000)

random_bytes = [
b'\xff' * 4, # for advancing rand_b and hitting the overflow
os.urandom(8), # for the next call to uuid7(), only called for generating rand_b
]
random_bytes_iter = iter(random_bytes)
os_urandom_fake = lambda n: next(random_bytes_iter, None)

with mock.patch.object(self.uuid, '_last_timestamp_v7', expect_timestamp_call_1), \
mock.patch.object(self.uuid, '_last_counter_v7_a', 0), \
mock.patch.object(self.uuid, '_last_counter_v7_b', 1 << 62), \
mock.patch('time.time_ns', iter(nanoseconds).__next__), \
mock.patch('os.urandom', os_urandom_fake):
u = self.uuid.uuid7()
# check that random_bytes_iter is exhausted
self.assertIsNone(os.urandom(1))
equal(u.variant, self.uuid.RFC_4122)
equal(u.version, 7)
equal(self.uuid._last_timestamp_v7, expect_timestamp_call_2)
unix_ts_ms = expect_timestamp_call_2 & 0xffffffffffff
equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms)
rand_a_second_call = 1871
equal(self.uuid._last_counter_v7_a, rand_a_second_call)
equal((u.int >> 64) & 0x0fff, rand_a_second_call)
rand_b_second_call = int.from_bytes(random_bytes[1]) & 0x3fffffffffffffff
equal(self.uuid._last_counter_v7_b, rand_b_second_call)
equal(u.int & 0x3fffffffffffffff, rand_b_second_call)

@support.requires_fork()
def testIssue8621(self):
# On at least some versions of OSX self.uuid.uuid4 generates
Expand Down
82 changes: 72 additions & 10 deletions Lib/uuid.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
r"""UUID objects (universally unique identifiers) according to RFC 4122.

This module provides immutable UUID objects (class UUID) and the functions
uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5
UUIDs as specified in RFC 4122.
uuid1(), uuid3(), uuid4(), uuid5(), and uuid7() for generating version 1, 3,
4, 5, and 7 UUIDs as specified in RFC 4122 (superseeded by RFC 9562 but still
referred to as RFC 4122 for compatibility purposes).

If all you want is a unique ID, you should probably call uuid1() or uuid4().
Note that uuid1() may compromise privacy since it creates a UUID containing
Expand Down Expand Up @@ -129,7 +130,7 @@ class UUID:
variant the UUID variant (one of the constants RESERVED_NCS,
RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE)

version the UUID version number (1 through 5, meaningful only
version the UUID version number (1, 3, 4, 5 and 7, meaningful only
when the variant is RFC_4122)

is_safe An enum indicating whether the UUID has been generated in
Expand Down Expand Up @@ -214,7 +215,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None,
if not 0 <= int < 1<<128:
raise ValueError('int is out of range (need a 128-bit value)')
if version is not None:
if not 1 <= version <= 5:
if not 1 <= version <= 7:
raise ValueError('illegal version number')
# Set the variant to RFC 4122.
int &= ~(0xc000 << 48)
Expand Down Expand Up @@ -656,7 +657,7 @@ def getnode():
assert False, '_random_getnode() returned invalid value: {}'.format(_node)


_last_timestamp = None
_last_timestamp_v1 = None

def uuid1(node=None, clock_seq=None):
"""Generate a UUID from a host ID, sequence number, and the current time.
Expand All @@ -674,15 +675,15 @@ def uuid1(node=None, clock_seq=None):
is_safe = SafeUUID.unknown
return UUID(bytes=uuid_time, is_safe=is_safe)

global _last_timestamp
global _last_timestamp_v1
import time
nanoseconds = time.time_ns()
# 0x01b21dd213814000 is the number of 100-ns intervals between the
# UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00.
timestamp = nanoseconds // 100 + 0x01b21dd213814000
if _last_timestamp is not None and timestamp <= _last_timestamp:
timestamp = _last_timestamp + 1
_last_timestamp = timestamp
if _last_timestamp_v1 is not None and timestamp <= _last_timestamp_v1:
timestamp = _last_timestamp_v1 + 1
_last_timestamp_v1 = timestamp
if clock_seq is None:
import random
clock_seq = random.getrandbits(14) # instead of stable storage
Expand Down Expand Up @@ -719,14 +720,75 @@ def uuid5(namespace, name):
hash = sha1(namespace.bytes + name).digest()
return UUID(bytes=hash[:16], version=5)

_last_timestamp_v7 = None
_last_counter_v7_a = 0 # 12-bit sub-millisecond precision
_last_counter_v7_b = 0 # 62-bit seeded counter

def uuid7():
"""Generate a UUID from a Unix timestamp in milliseconds and random bits.

UUIDv7 objects feature monotonicity within a millisecond.
"""
# --- 48 --- -- 4 -- - 12 - -- 2 -- - 62 -
# unix_ts_ms | version | rand_a | variant | rand_b
#
# 'rand_a' is used for an additional 12-bit sub-millisecond
# precision constructed with Method 3 of RFC 9562, §6.2.
#
# 'rand_b' is a seeded counter generated according to
# the Method 2 of RFC 9562, §6.2. The initial counter
# is a random 62-bit integer and the counter is incremented
# by a random 32-bit integer within the same timestamp tick.
#
# If 'rand_b' overflows, it is regenerated and 'rand_a' is
# advanced by 1. If 'rand_a' also overflows, re-run uuid7().

def get_rand_b(): # random 62-bit integer
return int.from_bytes(os.urandom(8)) & 0x3fffffffffffffff

global _last_timestamp_v7
global _last_counter_v7_a
global _last_counter_v7_b

import time
nanoseconds = time.time_ns()
timestamp_ms, sub_millisecs = divmod(nanoseconds, 1_000_000)
# get the 12-bit sub-milliseconds precision part
assert 0 <= sub_millisecs < 1_000_000
rand_a = int((sub_millisecs / 1_000_000) * (1 << 12))
assert 0 <= rand_a <= 0xfff

if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7:
rand_b = get_rand_b()
else:
if timestamp_ms < _last_timestamp_v7:
timestamp_ms = _last_timestamp_v7 + 1
# advance 'rand_b' by a 32-bit random increment
rand_b = _last_counter_v7_b + int.from_bytes(os.urandom(4))
if rand_b > 0x3fffffffffffffff:
if rand_a == 4095: # fast path to avoid a call to os.urandom()
return uuid7()
rand_a += 1
rand_b = get_rand_b()

_last_timestamp_v7 = timestamp_ms
_last_counter_v7_a = rand_a
_last_counter_v7_b = rand_b

int_uuid_7 = (timestamp_ms & 0xffffffffffff) << 80
int_uuid_7 |= rand_a << 64
int_uuid_7 |= rand_b
return UUID(int=int_uuid_7, version=7)


def main():
"""Run the uuid command line interface."""
uuid_funcs = {
"uuid1": uuid1,
"uuid3": uuid3,
"uuid4": uuid4,
"uuid5": uuid5
"uuid5": uuid5,
"uuid7": uuid7,
}
uuid_namespace_funcs = ("uuid3", "uuid5")
namespaces = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add :func:`~uuid.uuid7` to the :mod:`uuid` module as specified by :rfc:`9562`.
Patch by Bénédikt Tran.
Loading