diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 39e82d0e19a9ac..97cb1c97cb7f9d 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -12,8 +12,8 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5`, -and :func:`uuid.uuid8` for generating version 1, 3, 4, 5, and 8 UUIDs as -specified in :rfc:`9562` (which supersedes :rfc:`4122`). +:func:`uuid7`, and :func:`uuid8` for generating version 1, 3, 4, 5, 7, and 8 +UUIDs as specified in :rfc:`9562` (which supersedes :rfc:`4122`). If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -153,8 +153,8 @@ which relays any information about the UUID's safety, using this enumeration: The UUID version number (1 through 8, meaningful only when the variant is :const:`RFC_4122`). - .. versionchanged:: 3.14 - Added UUID version 8. + .. versionchanged:: next + Added UUID versions 7 and 8. .. attribute:: UUID.is_safe @@ -222,6 +222,13 @@ The :mod:`uuid` module defines the following functions: .. index:: single: uuid5 +.. function:: uuid7() + + .. versionadded:: next + +.. index:: single: uuid7 + + .. function:: uuid8(a=None, b=None, c=None) Generate a pseudo-random UUID according to @@ -307,7 +314,7 @@ The :mod:`uuid` module can be executed as a script from the command line. .. code-block:: sh - python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid8}] [-n NAMESPACE] [-N NAME] + python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid7,uuid8}] [-n NAMESPACE] [-N NAME] The following options are accepted: @@ -323,8 +330,8 @@ The following options are accepted: Specify the function name to use to generate the uuid. By default :func:`uuid4` is used. - .. versionadded:: 3.14 - Allow generating UUID version 8. + .. versionchanged:: next + Allow generating UUID versions 7 and 8. .. option:: -n --namespace diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 72abfebd46f2b9..3095502eb8e322 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -682,10 +682,11 @@ urllib uuid ---- -* Add support for UUID version 8 via :func:`uuid.uuid8` as specified - in :rfc:`9562`. +* Add support for UUID versions 7 and 8 via :func:`uuid.uuid7` and + :func:`uuid.uuid8` respectively, as specified in :rfc:`9562`. (Contributed by Bénédikt Tran in :gh:`89083`.) + zipinfo ------- diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 8f40dd97f42fdc..70a3d804bac6ca 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -1,6 +1,3 @@ -import unittest -from test import support -from test.support import import_helper import builtins import contextlib import copy @@ -10,10 +7,14 @@ import pickle import random import sys +import unittest import weakref from itertools import product from unittest import mock +from test import support +from test.support import import_helper + py_uuid = import_helper.import_fresh_module('uuid', blocked=['_uuid']) c_uuid = import_helper.import_fresh_module('uuid', fresh=['_uuid']) @@ -590,7 +591,7 @@ def test_uuid1_bogus_return_value(self): def test_uuid1_time(self): with mock.patch.object(self.uuid, '_generate_time_safe', None), \ - mock.patch.object(self.uuid, '_last_timestamp', None), \ + mock.patch.object(self.uuid, '_last_timestamp_v1', None), \ mock.patch.object(self.uuid, 'getnode', return_value=93328246233727), \ mock.patch('time.time_ns', return_value=1545052026752910643), \ mock.patch('random.getrandbits', return_value=5317): # guaranteed to be random @@ -598,7 +599,7 @@ def test_uuid1_time(self): self.assertEqual(u, self.uuid.UUID('a7a55b92-01fc-11e9-94c5-54e1acf6da7f')) with mock.patch.object(self.uuid, '_generate_time_safe', None), \ - mock.patch.object(self.uuid, '_last_timestamp', None), \ + mock.patch.object(self.uuid, '_last_timestamp_v1', None), \ mock.patch('time.time_ns', return_value=1545052026752910643): u = self.uuid.uuid1(node=93328246233727, clock_seq=5317) self.assertEqual(u, self.uuid.UUID('a7a55b92-01fc-11e9-94c5-54e1acf6da7f')) @@ -683,6 +684,209 @@ def test_uuid5(self): equal(u, self.uuid.UUID(v)) equal(str(u), v) + def test_uuid7(self): + equal = self.assertEqual + u = self.uuid.uuid7() + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + + for _ in range(100): + counter_hi = random.getrandbits(11) + counter_lo = random.getrandbits(30) + counter = (counter_hi << 30) | counter_lo + + tail = random.getrandbits(32) + # effective number of bits is 32 + 30 + 11 = 73 + random_bits = counter << 32 | tail + + # set all remaining MSB of fake random bits to 1 to ensure that + # the implementation correctly removes them + random_bits = (((1 << 7) - 1) << 73) | random_bits + random_data = random_bits.to_bytes(10) + + with ( + mock.patch.multiple( + self.uuid, + _last_timestamp_v7=None, + _last_counter_v7=0, + ), + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=random_data) as urand + ): + u = self.uuid.uuid7() + urand.assert_called_once_with(10) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + + equal(self.uuid._last_timestamp_v7, timestamp_ms) + equal(self.uuid._last_counter_v7, counter) + + unix_ts_ms = timestamp_ms & 0xffff_ffff_ffff + equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms) + + equal((u.int >> 75) & 1, 0) # check that the MSB is 0 + equal((u.int >> 64) & 0xfff, counter_hi) + equal((u.int >> 32) & 0x3fff_ffff, counter_lo) + equal(u.int & 0xffff_ffff, tail) + + def test_uuid7_uniqueness(self): + # Test that UUIDv7-generated values are unique. + # + # While UUIDv8 has an entropy of 122 bits, those 122 bits may not + # necessarily be sampled from a PRNG. On the other hand, UUIDv7 + # uses os.urandom() as a PRNG which features better randomness. + # + # Until reaching UNIX_EPOCH + 10'000 years, the probability for + # generating two identical UUIDs is negligible. + N = 1000 + uuids = {self.uuid.uuid7() for _ in range(N)} + self.assertEqual(len(uuids), N) + + versions = {u.version for u in uuids} + self.assertSetEqual(versions, {7}) + + def test_uuid7_monotonicity(self): + equal = self.assertEqual + + us = [self.uuid.uuid7() for _ in range(10_000)] + equal(us, sorted(us)) + + with mock.patch.multiple( + self.uuid, + _last_timestamp_v7=0, + _last_counter_v7=0, + ): + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + + # counter_{hi,lo} are chosen so that "counter + 1" does not overflow + counter_hi = random.getrandbits(11) + counter_lo = random.getrandbits(29) + counter = (counter_hi << 30) | counter_lo + self.assertLess(counter + 1, 0x3ff_ffff_ffff) + + tail = random.getrandbits(32) + random_bits = counter << 32 | tail + random_data = random_bits.to_bytes(10) + + with ( + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=random_data) as urand + ): + u1 = self.uuid.uuid7() + urand.assert_called_once_with(10) + equal(self.uuid._last_timestamp_v7, timestamp_ms) + equal(self.uuid._last_counter_v7, counter) + equal((u1.int >> 64) & 0xfff, counter_hi) + equal((u1.int >> 32) & 0x3fff_ffff, counter_lo) + equal(u1.int & 0xffff_ffff, tail) + + # 1 Jan 2023 12:34:56.123_457_032 (same millisecond but not same ns) + next_timestamp_ns = 1672533296_123_457_032 + next_timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + equal(timestamp_ms, next_timestamp_ms) + + next_tail_bytes = os.urandom(4) + next_fail = int.from_bytes(next_tail_bytes) + + with ( + mock.patch('time.time_ns', return_value=next_timestamp_ns), + mock.patch('os.urandom', return_value=next_tail_bytes) as urand + ): + u2 = self.uuid.uuid7() + urand.assert_called_once_with(4) + # same milli-second + equal(self.uuid._last_timestamp_v7, timestamp_ms) + # 42-bit counter advanced by 1 + equal(self.uuid._last_counter_v7, counter + 1) + equal((u2.int >> 64) & 0xfff, counter_hi) + equal((u2.int >> 32) & 0x3fff_ffff, counter_lo + 1) + equal(u2.int & 0xffff_ffff, next_fail) + + self.assertLess(u1, u2) + + def test_uuid7_timestamp_backwards(self): + equal = self.assertEqual + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + fake_last_timestamp_v7 = timestamp_ms + 1 + + # counter_{hi,lo} are chosen so that "counter + 1" does not overflow + counter_hi = random.getrandbits(11) + counter_lo = random.getrandbits(29) + counter = (counter_hi << 30) | counter_lo + self.assertLess(counter + 1, 0x3ff_ffff_ffff) + + tail_bytes = os.urandom(4) + tail = int.from_bytes(tail_bytes) + + with ( + mock.patch.multiple( + self.uuid, + _last_timestamp_v7=fake_last_timestamp_v7, + _last_counter_v7=counter, + ), + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=tail_bytes) as urand + ): + u = self.uuid.uuid7() + urand.assert_called_once_with(4) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + equal(self.uuid._last_timestamp_v7, fake_last_timestamp_v7 + 1) + unix_ts_ms = (fake_last_timestamp_v7 + 1) & 0xffff_ffff_ffff + equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms) + # 42-bit counter advanced by 1 + equal(self.uuid._last_counter_v7, counter + 1) + equal((u.int >> 64) & 0xfff, counter_hi) + # 42-bit counter advanced by 1 (counter_hi is untouched) + equal((u.int >> 32) & 0x3fff_ffff, counter_lo + 1) + equal(u.int & 0xffff_ffff, tail) + + def test_uuid7_overflow_counter(self): + equal = self.assertEqual + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + + new_counter_hi = random.getrandbits(11) + new_counter_lo = random.getrandbits(30) + new_counter = (new_counter_hi << 30) | new_counter_lo + + tail = random.getrandbits(32) + random_bits = (new_counter << 32) | tail + random_data = random_bits.to_bytes(10) + + with ( + mock.patch.multiple( + self.uuid, + _last_timestamp_v7=timestamp_ms, + # same timestamp, but force an overflow on the counter + _last_counter_v7=0x3ff_ffff_ffff, + ), + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=random_data) as urand + ): + u = self.uuid.uuid7() + urand.assert_called_with(10) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + # timestamp advanced due to overflow + equal(self.uuid._last_timestamp_v7, timestamp_ms + 1) + unix_ts_ms = (timestamp_ms + 1) & 0xffff_ffff_ffff + equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms) + # counter overflowed, so we picked a new one + equal(self.uuid._last_counter_v7, new_counter) + equal((u.int >> 64) & 0xfff, new_counter_hi) + equal((u.int >> 32) & 0x3fff_ffff, new_counter_lo) + equal(u.int & 0xffff_ffff, tail) + def test_uuid8(self): equal = self.assertEqual u = self.uuid.uuid8() diff --git a/Lib/uuid.py b/Lib/uuid.py index 9c6ad9643cf6d5..f0d9b39c43b7ba 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -1,8 +1,8 @@ r"""UUID objects (universally unique identifiers) according to RFC 4122/9562. This module provides immutable UUID objects (class UUID) and the functions -uuid1(), uuid3(), uuid4(), uuid5(), and uuid8() for generating version 1, 3, -4, 5, and 8 UUIDs as specified in RFC 4122/9562. +uuid1(), uuid3(), uuid4(), uuid5(), uuid7(), and uuid8() for generating +version 1, 3, 4, 5, 7, and 8 UUIDs as specified in RFC 4122/9562. If all you want is a unique ID, you should probably call uuid1() or uuid4(). Note that uuid1() may compromise privacy since it creates a UUID containing @@ -656,7 +656,7 @@ def getnode(): assert False, '_random_getnode() returned invalid value: {}'.format(_node) -_last_timestamp = None +_last_timestamp_v1 = None def uuid1(node=None, clock_seq=None): """Generate a UUID from a host ID, sequence number, and the current time. @@ -674,15 +674,15 @@ def uuid1(node=None, clock_seq=None): is_safe = SafeUUID.unknown return UUID(bytes=uuid_time, is_safe=is_safe) - global _last_timestamp + global _last_timestamp_v1 import time nanoseconds = time.time_ns() # 0x01b21dd213814000 is the number of 100-ns intervals between the # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00. timestamp = nanoseconds // 100 + 0x01b21dd213814000 - if _last_timestamp is not None and timestamp <= _last_timestamp: - timestamp = _last_timestamp + 1 - _last_timestamp = timestamp + if _last_timestamp_v1 is not None and timestamp <= _last_timestamp_v1: + timestamp = _last_timestamp_v1 + 1 + _last_timestamp_v1 = timestamp if clock_seq is None: import random clock_seq = random.getrandbits(14) # instead of stable storage @@ -719,6 +719,64 @@ def uuid5(namespace, name): hash = sha1(namespace.bytes + name).digest() return UUID(bytes=hash[:16], version=5) +_last_timestamp_v7 = None +_last_counter_v7 = 0 # 42-bit counter + +def uuid7(): + """Generate a UUID from a Unix timestamp in milliseconds and random bits. + + UUIDv7 objects feature monotonicity within a millisecond. + """ + # --- 48 --- -- 4 -- --- 12 --- -- 2 -- --- 30 --- - 32 - + # unix_ts_ms | version | counter_hi | variant | counter_lo | random + # + # 'counter = counter_hi | counter_lo' is a 42-bit counter constructed + # with Method 1 of RFC 9562, §6.2, and its MSB is set to 0. + # + # 'random' is a 32-bit random value regenerated for every new UUID. + # + # If multiple UUIDs are generated within the same millisecond, the LSB + # of 'counter' is incremented by 1. When overflowing, the timestamp is + # advanced and the counter is reset to a random 42-bit integer with MSB + # set to 0. + + def get_counter_and_tail(): + rand = int.from_bytes(os.urandom(10)) + # 42-bit counter with MSB set to 0 + counter = (rand >> 32) & 0x1ff_ffff_ffff + # 32-bit random data + tail = rand & 0xffff_ffff + return counter, tail + + global _last_timestamp_v7 + global _last_counter_v7 + + import time + nanoseconds = time.time_ns() + timestamp_ms, _ = divmod(nanoseconds, 1_000_000) + + if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7: + counter, tail = get_counter_and_tail() + else: + if timestamp_ms < _last_timestamp_v7: + timestamp_ms = _last_timestamp_v7 + 1 + # advance the 42-bit counter + counter = _last_counter_v7 + 1 + if counter > 0x3ff_ffff_ffff: + timestamp_ms += 1 # advance the 48-bit timestamp + counter, tail = get_counter_and_tail() + else: + tail = int.from_bytes(os.urandom(4)) + + _last_timestamp_v7 = timestamp_ms + _last_counter_v7 = counter + + int_uuid_7 = (timestamp_ms & 0xffff_ffff_ffff) << 80 + int_uuid_7 |= ((counter >> 30) & 0xfff) << 64 + int_uuid_7 |= (counter & 0x3fff_ffff) << 32 + int_uuid_7 |= tail & 0xffff_ffff + return UUID(int=int_uuid_7, version=7) + def uuid8(a=None, b=None, c=None): """Generate a UUID from three custom blocks. @@ -749,6 +807,7 @@ def main(): "uuid3": uuid3, "uuid4": uuid4, "uuid5": uuid5, + "uuid7": uuid7, "uuid8": uuid8, } uuid_namespace_funcs = ("uuid3", "uuid5") diff --git a/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst b/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst new file mode 100644 index 00000000000000..f85e05622623c2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst @@ -0,0 +1,2 @@ +Add :func:`uuid.uuid7` for generating UUIDv7 objects as specified in +:rfc:`9562`. Patch by Bénédikt Tran.