From 42d55b4b0df528761adf43f1cc7b761187901152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 28 Jun 2024 10:15:47 +0200 Subject: [PATCH 01/29] add UUIDv7 implementation --- Lib/uuid.py | 79 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 10 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index c286eac38e1ef4..c1ff9d746b739c 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -1,8 +1,9 @@ r"""UUID objects (universally unique identifiers) according to RFC 4122. This module provides immutable UUID objects (class UUID) and the functions -uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5 -UUIDs as specified in RFC 4122. +uuid1(), uuid3(), uuid4(), uuid5(), and uuid7() for generating version 1, 3, +4, 5, and 7 UUIDs as specified in RFC 4122 (superseeded by RFC 9562 but still +referred to as RFC 4122 for compatibility purposes). If all you want is a unique ID, you should probably call uuid1() or uuid4(). Note that uuid1() may compromise privacy since it creates a UUID containing @@ -129,7 +130,7 @@ class UUID: variant the UUID variant (one of the constants RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE) - version the UUID version number (1 through 5, meaningful only + version the UUID version number (1, 3, 4, 5 and 7, meaningful only when the variant is RFC_4122) is_safe An enum indicating whether the UUID has been generated in @@ -214,7 +215,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, if not 0 <= int < 1<<128: raise ValueError('int is out of range (need a 128-bit value)') if version is not None: - if not 1 <= version <= 5: + if not 1 <= version <= 7: raise ValueError('illegal version number') # Set the variant to RFC 4122. int &= ~(0xc000 << 48) @@ -656,7 +657,7 @@ def getnode(): assert False, '_random_getnode() returned invalid value: {}'.format(_node) -_last_timestamp = None +_last_timestamp_v1 = None def uuid1(node=None, clock_seq=None): """Generate a UUID from a host ID, sequence number, and the current time. @@ -674,15 +675,15 @@ def uuid1(node=None, clock_seq=None): is_safe = SafeUUID.unknown return UUID(bytes=uuid_time, is_safe=is_safe) - global _last_timestamp + global _last_timestamp_v1 import time nanoseconds = time.time_ns() # 0x01b21dd213814000 is the number of 100-ns intervals between the # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00. timestamp = nanoseconds // 100 + 0x01b21dd213814000 - if _last_timestamp is not None and timestamp <= _last_timestamp: - timestamp = _last_timestamp + 1 - _last_timestamp = timestamp + if _last_timestamp_v1 is not None and timestamp <= _last_timestamp_v1: + timestamp = _last_timestamp_v1 + 1 + _last_timestamp_v1 = timestamp if clock_seq is None: import random clock_seq = random.getrandbits(14) # instead of stable storage @@ -719,6 +720,63 @@ def uuid5(namespace, name): hash = sha1(namespace.bytes + name).digest() return UUID(bytes=hash[:16], version=5) +_last_timestamp_v7 = None +_last_counter_v7 = 0 # 42-bit counter + +def uuid7(): + """Generate a UUID from a Unix timestamp in milliseconds and random bits. + + UUIDv7 objects feature monotonicity within a millisecond. + """ + # --- 48 --- -- 4 -- --- 12 --- -- 2 -- --- 30 --- - 32 - + # unix_ts_ms | version | counter_hi | variant | counter_lo | random + # + # 'counter = counter_hi | counter_lo' is a 42-bit counter constructed + # with Method 1 of RFC 9562, §6.2, and its MSB is set to 0. + # + # 'random' is a 32-bit random value regenerated for every new UUID. + # + # If multiple UUIDs are generated within the same millisecond, the LSB + # of 'counter' is incremented by 1. When overflowing, the timestamp is + # advanced and the counter is reset to a random 42-bit integer with MSB + # set to 0. + + def get_counter_and_tail(): + rand = int.from_bytes(os.urandom(10)) + # 42-bit counter with MSB set to 0 + counter = (rand >> 32) & 0x1ffffffffff + # 32-bit random data + tail = rand & 0xffffffff + return counter, tail + + global _last_timestamp_v7 + global _last_counter_v7 + + import time + nanoseconds = time.time_ns() + timestamp_ms, _ = divmod(nanoseconds, 1_000_000) + + if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7: + counter, tail = get_counter_and_tail() + else: + if timestamp_ms < _last_timestamp_v7: + timestamp_ms = _last_timestamp_v7 + 1 + # advance the counter + counter = _last_counter_v7 + 1 + if counter > 0x3ffffffffff: + timestamp_ms += 1 # advance the timestamp + counter, tail = get_counter_and_tail() + else: + tail = int.from_bytes(os.urandom(4)) + + _last_timestamp_v7 = timestamp_ms + _last_counter_v7 = counter + + int_uuid_7 = (timestamp_ms & 0xffffffffffff) << 80 + int_uuid_7 |= ((counter >> 30) & 0xfff) << 64 + int_uuid_7 |= (counter & 0x3fffffff) << 32 + int_uuid_7 |= tail & 0xffffffff + return UUID(int=int_uuid_7, version=7) def main(): """Run the uuid command line interface.""" @@ -726,7 +784,8 @@ def main(): "uuid1": uuid1, "uuid3": uuid3, "uuid4": uuid4, - "uuid5": uuid5 + "uuid5": uuid5, + "uuid7": uuid7, } uuid_namespace_funcs = ("uuid3", "uuid5") namespaces = { From 6826fa1b9b61f8bfc1299410a5a594084b63d0d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 28 Jun 2024 11:25:51 +0200 Subject: [PATCH 02/29] add tests --- Lib/test/test_uuid.py | 177 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 174 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index e177464c00f7a6..d5a601d8b60126 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -1,3 +1,4 @@ +import random import unittest from test import support from test.support import import_helper @@ -267,7 +268,7 @@ def test_exceptions(self): # Version number out of range. badvalue(lambda: self.uuid.UUID('00'*16, version=0)) - badvalue(lambda: self.uuid.UUID('00'*16, version=6)) + badvalue(lambda: self.uuid.UUID('00'*16, version=42)) # Integer value out of range. badvalue(lambda: self.uuid.UUID(int=-1)) @@ -588,7 +589,7 @@ def test_uuid1_bogus_return_value(self): def test_uuid1_time(self): with mock.patch.object(self.uuid, '_generate_time_safe', None), \ - mock.patch.object(self.uuid, '_last_timestamp', None), \ + mock.patch.object(self.uuid, '_last_timestamp_v1', None), \ mock.patch.object(self.uuid, 'getnode', return_value=93328246233727), \ mock.patch('time.time_ns', return_value=1545052026752910643), \ mock.patch('random.getrandbits', return_value=5317): # guaranteed to be random @@ -596,7 +597,7 @@ def test_uuid1_time(self): self.assertEqual(u, self.uuid.UUID('a7a55b92-01fc-11e9-94c5-54e1acf6da7f')) with mock.patch.object(self.uuid, '_generate_time_safe', None), \ - mock.patch.object(self.uuid, '_last_timestamp', None), \ + mock.patch.object(self.uuid, '_last_timestamp_v1', None), \ mock.patch('time.time_ns', return_value=1545052026752910643): u = self.uuid.uuid1(node=93328246233727, clock_seq=5317) self.assertEqual(u, self.uuid.UUID('a7a55b92-01fc-11e9-94c5-54e1acf6da7f')) @@ -681,6 +682,176 @@ def test_uuid5(self): equal(u, self.uuid.UUID(v)) equal(str(u), v) + def test_uuid7(self): + equal = self.assertEqual + u = self.uuid.uuid7() + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + + for _ in range(100): + counter_hi = random.getrandbits(11) + counter_lo = random.getrandbits(30) + counter = (counter_hi << 30) | counter_lo + + tail = random.getrandbits(32) + # effective number of bits is 32 + 30 + 11 = 73 + random_bits = counter << 32 | tail + + # set all remaining MSB of fake random bits to 1 to ensure that + # the implementation correctly remove them + random_bits = (((1 << 7) - 1) << 73) | random_bits + random_data = random_bits.to_bytes(10) + + with ( + mock.patch.object(self.uuid, '_last_timestamp_v7', None), + mock.patch.object(self.uuid, '_last_counter_v7', 0), + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=random_data) as urand + ): + u = self.uuid.uuid7() + urand.assert_called_once_with(10) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + + equal(self.uuid._last_timestamp_v7, timestamp_ms) + equal(self.uuid._last_counter_v7, counter) + + unix_ts_ms = timestamp_ms & 0xffffffffffff + equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms) + + equal((u.int >> 75) & 1, 0) # check that the MSB is 0 + equal((u.int >> 64) & 0xfff, counter_hi) + equal((u.int >> 32) & 0x3fffffff, counter_lo) + equal(u.int & 0xffffffff, tail) + + def test_uuid7_monotonicity(self): + equal = self.assertEqual + + us = [self.uuid.uuid7() for _ in range(10_000)] + equal(us, sorted(us)) + + with mock.patch.multiple(self.uuid, _last_timestamp_v7=0, _last_counter_v7=0): + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + + counter_hi = random.getrandbits(11) + counter_lo = random.getrandbits(29) # make sure that +1 does not overflow + counter = (counter_hi << 30) | counter_lo + + tail = random.getrandbits(32) + random_bits = counter << 32 | tail + random_data = random_bits.to_bytes(10) + + with ( + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=random_data) as urand + ): + u1 = self.uuid.uuid7() + urand.assert_called_once_with(10) + equal(self.uuid._last_timestamp_v7, timestamp_ms) + equal(self.uuid._last_counter_v7, counter) + equal((u1.int >> 64) & 0xfff, counter_hi) + equal((u1.int >> 32) & 0x3fffffff, counter_lo) + equal(u1.int & 0xffffffff, tail) + + # 1 Jan 2023 12:34:56.123_457_032 (same millisecond but not same prec) + next_timestamp_ns = 1672533296_123_457_032 + next_timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + equal(timestamp_ms, next_timestamp_ms) + + next_tail_bytes = os.urandom(4) + next_fail = int.from_bytes(next_tail_bytes) + + with ( + mock.patch('time.time_ns', return_value=next_timestamp_ns), + mock.patch('os.urandom', return_value=next_tail_bytes) as urand + ): + u2 = self.uuid.uuid7() + urand.assert_called_once_with(4) + # same milli-second + equal(self.uuid._last_timestamp_v7, timestamp_ms) + # counter advanced by 1 + equal(self.uuid._last_counter_v7, counter + 1) + equal((u2.int >> 64) & 0xfff, counter_hi) + equal((u2.int >> 32) & 0x3fffffff, counter_lo + 1) + equal(u2.int & 0xffffffff, next_fail) + + self.assertLess(u1, u2) + + def test_uuid7_timestamp_backwards(self): + equal = self.assertEqual + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + fake_last_timestamp_v7 = timestamp_ms + 1 + + counter_hi = random.getrandbits(11) + counter_lo = random.getrandbits(29) # make sure that +1 does not overflow + counter = (counter_hi << 30) | counter_lo + + tail_bytes = os.urandom(4) + tail = int.from_bytes(tail_bytes) + + with ( + mock.patch.object(self.uuid, '_last_timestamp_v7', fake_last_timestamp_v7), + mock.patch.object(self.uuid, '_last_counter_v7', counter), + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=tail_bytes) as os_urandom_fake + ): + u = self.uuid.uuid7() + os_urandom_fake.assert_called_once_with(4) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + equal(self.uuid._last_timestamp_v7, fake_last_timestamp_v7 + 1) + unix_ts_ms = (fake_last_timestamp_v7 + 1) & 0xffffffffffff + equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms) + # counter advanced by 1 + equal(self.uuid._last_counter_v7, counter + 1) + equal((u.int >> 64) & 0xfff, counter_hi) + # counter advanced by 1 (constructed so that counter_hi is unchanged) + equal((u.int >> 32) & 0x3fffffff, counter_lo + 1) + equal(u.int & 0xffffffff, tail) + + def test_uuid7_overflow_counter(self): + equal = self.assertEqual + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + + new_counter_hi = random.getrandbits(11) + new_counter_lo = random.getrandbits(30) + new_counter = (new_counter_hi << 30) | new_counter_lo + + tail = random.getrandbits(32) + random_bits = new_counter << 32 | tail + random_data = random_bits.to_bytes(10) + + with ( + mock.patch.object(self.uuid, '_last_timestamp_v7', timestamp_ms), + # same timestamp, but force an overflow on the counter + mock.patch.object(self.uuid, '_last_counter_v7', 0x3ffffffffff), + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=random_data) as urand + ): + u = self.uuid.uuid7() + urand.assert_called_with(10) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + # timestamp advanced due to overflow + equal(self.uuid._last_timestamp_v7, timestamp_ms + 1) + unix_ts_ms = (timestamp_ms + 1) & 0xffffffffffff + equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms) + # counter overflow, so we picked a new one + equal(self.uuid._last_counter_v7, new_counter) + equal((u.int >> 64) & 0xfff, new_counter_hi) + equal((u.int >> 32) & 0x3fffffff, new_counter_lo) + equal(u.int & 0xffffffff, tail) + @support.requires_fork() def testIssue8621(self): # On at least some versions of OSX self.uuid.uuid4 generates From edc2caba5949a1018f606e2ea3921a79aadb895b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 28 Jun 2024 11:27:29 +0200 Subject: [PATCH 03/29] blurb --- .../next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst diff --git a/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst b/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst new file mode 100644 index 00000000000000..f85e05622623c2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst @@ -0,0 +1,2 @@ +Add :func:`uuid.uuid7` for generating UUIDv7 objects as specified in +:rfc:`9562`. Patch by Bénédikt Tran. From c6d26b63610aaa1d77fa7b141fe43e08f57532e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 28 Jun 2024 11:28:36 +0200 Subject: [PATCH 04/29] update CHANGELOG --- Doc/whatsnew/3.14.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 9662044915b8ca..1e56fd076ba8cf 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -121,6 +121,14 @@ symtable (Contributed by Bénédikt Tran in :gh:`120029`.) +uuid +---- + +* Add support for UUID version 7 via :func:`uuid.uuid7` as specified + in :rfc:`9562`. + + (Contributed by Bénédikt Tran in :gh:`89083`.) + Optimizations ============= From 2ddb4b8fcd9b0d622b2cd1b65b1580c547600ce0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 28 Jun 2024 11:32:38 +0200 Subject: [PATCH 05/29] update RFC number --- Doc/library/uuid.rst | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 0f2d7820cb25c8..2cd3c842d5c5f8 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -1,8 +1,8 @@ -:mod:`!uuid` --- UUID objects according to :rfc:`4122` +:mod:`!uuid` --- UUID objects according to :rfc:`9562` ====================================================== .. module:: uuid - :synopsis: UUID objects (universally unique identifiers) according to RFC 4122 + :synopsis: UUID objects (universally unique identifiers) according to RFC 9562 .. moduleauthor:: Ka-Ping Yee .. sectionauthor:: George Yoshida @@ -12,7 +12,7 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5` for -generating version 1, 3, 4, and 5 UUIDs as specified in :rfc:`4122`. +generating version 1, 3, 4, and 5 UUIDs as specified in :rfc:`9562`. If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -65,7 +65,7 @@ which relays any information about the UUID's safety, using this enumeration: Exactly one of *hex*, *bytes*, *bytes_le*, *fields*, or *int* must be given. The *version* argument is optional; if given, the resulting UUID will have its - variant and version number set according to :rfc:`4122`, overriding bits in the + variant and version number set according to :rfc:`9562`, overriding bits in the given *hex*, *bytes*, *bytes_le*, *fields*, or *int*. Comparison of UUID objects are made by way of comparing their @@ -137,7 +137,7 @@ which relays any information about the UUID's safety, using this enumeration: .. attribute:: UUID.urn - The UUID as a URN as specified in :rfc:`4122`. + The UUID as a URN as specified in :rfc:`9562`. .. attribute:: UUID.variant @@ -168,7 +168,7 @@ The :mod:`uuid` module defines the following functions: runs, it may launch a separate program, which could be quite slow. If all attempts to obtain the hardware address fail, we choose a random 48-bit number with the multicast bit (least significant bit of the first octet) - set to 1 as recommended in :rfc:`4122`. "Hardware address" means the MAC + set to 1 as recommended in :rfc:`9562`. "Hardware address" means the MAC address of a network interface. On a machine with multiple network interfaces, universally administered MAC addresses (i.e. where the second least significant bit of the first octet is *unset*) will be preferred over @@ -252,7 +252,12 @@ of the :attr:`~UUID.variant` attribute: .. data:: RFC_4122 - Specifies the UUID layout given in :rfc:`4122`. + Specifies the UUID layout given in :rfc:`9562`. + + .. note:: + + For compatibility reasons, the content of the :data:`!RFC_4122` constant + is not updated to reflect the new RFC number. .. data:: RESERVED_MICROSOFT @@ -267,7 +272,7 @@ of the :attr:`~UUID.variant` attribute: .. seealso:: - :rfc:`4122` - A Universally Unique IDentifier (UUID) URN Namespace + :rfc:`9562` - A Universally Unique IDentifier (UUID) URN Namespace This specification defines a Uniform Resource Name namespace for UUIDs, the internal format of UUIDs, and methods of generating UUIDs. From bcd1417e8c8a1d23091930d6e5ca3190873d7191 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 28 Jun 2024 11:36:56 +0200 Subject: [PATCH 06/29] add TODO in the docs --- Doc/library/uuid.rst | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 2cd3c842d5c5f8..9d71657b7670ee 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -12,7 +12,7 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5` for -generating version 1, 3, 4, and 5 UUIDs as specified in :rfc:`9562`. +generating version 1, 3, 4, 5, and 7 UUIDs as specified in :rfc:`9562`. If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -149,7 +149,7 @@ which relays any information about the UUID's safety, using this enumeration: .. attribute:: UUID.version - The UUID version number (1 through 5, meaningful only when the variant is + The UUID version number (1 through 7, meaningful only when the variant is :const:`RFC_4122`). .. attribute:: UUID.is_safe @@ -216,6 +216,14 @@ The :mod:`uuid` module defines the following functions: .. index:: single: uuid5 + +.. function:: uuid7() + + TODO + +.. index:: single: uuid7 + + The :mod:`uuid` module defines the following namespace identifiers for use with :func:`uuid3` or :func:`uuid5`. From c3d474519e63ad34777e3f3f26b9793bc0fececf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:18:57 +0200 Subject: [PATCH 07/29] add UUIDv8 implementation --- Lib/uuid.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 4d4f06cfc9ebbe..2ff64fc39fb83e 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -1,8 +1,9 @@ r"""UUID objects (universally unique identifiers) according to RFC 4122. This module provides immutable UUID objects (class UUID) and the functions -uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5 -UUIDs as specified in RFC 4122. +uuid1(), uuid3(), uuid4(), uuid5(), and uuid8() for generating version 1, 3, +4, 5, and 8 UUIDs as specified in RFC 4122 (superseeded by RFC 9562 but still +referred to as RFC 4122 for compatibility purposes). If all you want is a unique ID, you should probably call uuid1() or uuid4(). Note that uuid1() may compromise privacy since it creates a UUID containing @@ -129,7 +130,7 @@ class UUID: variant the UUID variant (one of the constants RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE) - version the UUID version number (1 through 5, meaningful only + version the UUID version number (1 through 8, meaningful only when the variant is RFC_4122) is_safe An enum indicating whether the UUID has been generated in @@ -214,7 +215,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, if not 0 <= int < 1<<128: raise ValueError('int is out of range (need a 128-bit value)') if version is not None: - if not 1 <= version <= 5: + if not 1 <= version <= 8: raise ValueError('illegal version number') # Set the variant to RFC 4122. int &= ~(0xc000 << 48) @@ -719,6 +720,27 @@ def uuid5(namespace, name): hash = sha1(namespace.bytes + name).digest() return UUID(bytes=hash[:16], version=5) +def uuid8(a=None, b=None, c=None): + """Generate a UUID from three custom blocks. + 'a' is the first 48-bit chunk of the UUID (octets 0-5); + 'b' is the mid 12-bit chunk (octets 6-7); + 'c' is the last 62-bit chunk (octets 8-15). + When a value is not specified, a random value is generated. + """ + if a is None: + import random + a = random.getrandbits(48) + if b is None: + import random + b = random.getrandbits(12) + if c is None: + import random + c = random.getrandbits(62) + + int_uuid_8 = (a & 0xffffffffffff) << 80 + int_uuid_8 |= (b & 0xfff) << 64 + int_uuid_8 |= c & 0x3fffffffffffffff + return UUID(int=int_uuid_8, version=8) def main(): """Run the uuid command line interface.""" @@ -726,7 +748,8 @@ def main(): "uuid1": uuid1, "uuid3": uuid3, "uuid4": uuid4, - "uuid5": uuid5 + "uuid5": uuid5, + "uuid8": uuid8, } uuid_namespace_funcs = ("uuid3", "uuid5") namespaces = { From 392d289f549d6412dd9e9cef009edc37fcd4f334 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:19:01 +0200 Subject: [PATCH 08/29] add tests --- Lib/test/test_uuid.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index e177464c00f7a6..f89f14b82a0fe9 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -8,8 +8,10 @@ import io import os import pickle +import random import sys import weakref +from itertools import product from unittest import mock py_uuid = import_helper.import_fresh_module('uuid', blocked=['_uuid']) @@ -267,7 +269,7 @@ def test_exceptions(self): # Version number out of range. badvalue(lambda: self.uuid.UUID('00'*16, version=0)) - badvalue(lambda: self.uuid.UUID('00'*16, version=6)) + badvalue(lambda: self.uuid.UUID('00'*16, version=42)) # Integer value out of range. badvalue(lambda: self.uuid.UUID(int=-1)) @@ -681,6 +683,29 @@ def test_uuid5(self): equal(u, self.uuid.UUID(v)) equal(str(u), v) + def test_uuid8(self): + equal = self.assertEqual + u = self.uuid.uuid8() + + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 8) + + for (_, hi, mid, lo) in product( + range(10), # repeat 10 times + [None, 0, random.getrandbits(48)], + [None, 0, random.getrandbits(12)], + [None, 0, random.getrandbits(62)], + ): + u = self.uuid.uuid8(hi, mid, lo) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 8) + if hi is not None: + equal((u.int >> 80) & 0xffffffffffff, hi) + if mid is not None: + equal((u.int >> 64) & 0xfff, mid) + if lo is not None: + equal(u.int & 0x3fffffffffffffff, lo) + @support.requires_fork() def testIssue8621(self): # On at least some versions of OSX self.uuid.uuid4 generates From 26889ea442cf421ac383cb970ff88c8b3a566e32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:19:10 +0200 Subject: [PATCH 09/29] blurb --- .../next/Library/2024-08-22-12-12-35.gh-issue-89083.b6zFh0.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-08-22-12-12-35.gh-issue-89083.b6zFh0.rst diff --git a/Misc/NEWS.d/next/Library/2024-08-22-12-12-35.gh-issue-89083.b6zFh0.rst b/Misc/NEWS.d/next/Library/2024-08-22-12-12-35.gh-issue-89083.b6zFh0.rst new file mode 100644 index 00000000000000..d37d585d51b490 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-08-22-12-12-35.gh-issue-89083.b6zFh0.rst @@ -0,0 +1,2 @@ +Add :func:`uuid.uuid8` for generating UUIDv8 objects as specified in +:rfc:`9562`. Patch by Bénédikt Tran From 44b66e6c82a4d1aefdcf1a6cbb3ffe02d53596d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:19:17 +0200 Subject: [PATCH 10/29] add What's New entry --- Doc/whatsnew/3.14.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index a34dc639ad2a94..7730dc528c59d3 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -199,6 +199,14 @@ symtable (Contributed by Bénédikt Tran in :gh:`120029`.) +uuid +---- + +* Add support for UUID version 8 via :func:`uuid.uuid8` as specified + in :rfc:`9562`. + + (Contributed by Bénédikt Tran in :gh:`89083`.) + .. Add improved modules above alphabetically, not here at the end. Optimizations From 7be6dc4b402b3d0e68b3ba3eb44247e9aca2d216 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:19:19 +0200 Subject: [PATCH 11/29] add docs --- Doc/library/uuid.rst | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 0f2d7820cb25c8..f4b1a1e734ebc5 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -12,7 +12,7 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5` for -generating version 1, 3, 4, and 5 UUIDs as specified in :rfc:`4122`. +generating version 1, 3, 4, 5, and 8 UUIDs as specified in :rfc:`4122`. If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -149,9 +149,13 @@ which relays any information about the UUID's safety, using this enumeration: .. attribute:: UUID.version - The UUID version number (1 through 5, meaningful only when the variant is + The UUID version number (1 through 8, meaningful only when the variant is :const:`RFC_4122`). + .. versionchanged:: 3.14 + Added UUID version 8. + + .. attribute:: UUID.is_safe An enumeration of :class:`SafeUUID` which indicates whether the platform @@ -216,6 +220,16 @@ The :mod:`uuid` module defines the following functions: .. index:: single: uuid5 + +.. function:: uuid8(a=None, b=None, c=None) + + TODO + + .. versionadded:: 3.14 + +.. index:: single: uuid8 + + The :mod:`uuid` module defines the following namespace identifiers for use with :func:`uuid3` or :func:`uuid5`. From 8ba3d8b7d781e6a6e74f2b7563044bab7e46d90b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 25 Sep 2024 12:58:38 +0200 Subject: [PATCH 12/29] Improve hexadecimal masks reading --- Lib/uuid.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 2ff64fc39fb83e..fac7e32deb6275 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -722,10 +722,12 @@ def uuid5(namespace, name): def uuid8(a=None, b=None, c=None): """Generate a UUID from three custom blocks. - 'a' is the first 48-bit chunk of the UUID (octets 0-5); - 'b' is the mid 12-bit chunk (octets 6-7); - 'c' is the last 62-bit chunk (octets 8-15). - When a value is not specified, a random value is generated. + + * 'a' is the first 48-bit chunk of the UUID (octets 0-5); + * 'b' is the mid 12-bit chunk (octets 6-7); + * 'c' is the last 62-bit chunk (octets 8-15). + + When a value is not specified, a pseudo-random value is generated. """ if a is None: import random @@ -736,10 +738,9 @@ def uuid8(a=None, b=None, c=None): if c is None: import random c = random.getrandbits(62) - - int_uuid_8 = (a & 0xffffffffffff) << 80 + int_uuid_8 = (a & 0xffff_ffff_ffff) << 80 int_uuid_8 |= (b & 0xfff) << 64 - int_uuid_8 |= c & 0x3fffffffffffffff + int_uuid_8 |= c & 0x3fff_ffff_ffff_ffff return UUID(int=int_uuid_8, version=8) def main(): From a14ae9bf5e51ef0bd3bc1bfd068fab5921181e1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 25 Sep 2024 13:19:49 +0200 Subject: [PATCH 13/29] add uniqueness test --- Lib/test/test_uuid.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index f89f14b82a0fe9..39f65e2847e0ec 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -706,6 +706,14 @@ def test_uuid8(self): if lo is not None: equal(u.int & 0x3fffffffffffffff, lo) + def test_uuid8_uniqueness(self): + """Test that UUIDv8-generated values are unique (up to a negligible + probability of failure).""" + u1 = self.uuid.uuid8() + u2 = self.uuid.uuid8() + self.assertNotEqual(u1.int, u2.int) + self.assertEqual(u1.version, u2.version) + @support.requires_fork() def testIssue8621(self): # On at least some versions of OSX self.uuid.uuid4 generates From 7a169c96dc1c3a16d66d2856a144bacc1c1ebf0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 25 Sep 2024 13:27:47 +0200 Subject: [PATCH 14/29] Update mentions to RFC 4122 to RFC 4122/9562 when possible. --- Doc/library/uuid.rst | 18 ++++++++++-------- Lib/uuid.py | 11 +++++------ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index f4b1a1e734ebc5..111a313bf3bdec 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -1,8 +1,8 @@ -:mod:`!uuid` --- UUID objects according to :rfc:`4122` +:mod:`!uuid` --- UUID objects according to :rfc:`9562` ====================================================== .. module:: uuid - :synopsis: UUID objects (universally unique identifiers) according to RFC 4122 + :synopsis: UUID objects (universally unique identifiers) according to RFC 9562 .. moduleauthor:: Ka-Ping Yee .. sectionauthor:: George Yoshida @@ -12,7 +12,7 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5` for -generating version 1, 3, 4, 5, and 8 UUIDs as specified in :rfc:`4122`. +generating version 1, 3, 4, 5, and 8 UUIDs as specified in :rfc:`9562`. If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -65,7 +65,7 @@ which relays any information about the UUID's safety, using this enumeration: Exactly one of *hex*, *bytes*, *bytes_le*, *fields*, or *int* must be given. The *version* argument is optional; if given, the resulting UUID will have its - variant and version number set according to :rfc:`4122`, overriding bits in the + variant and version number set according to :rfc:`9562`, overriding bits in the given *hex*, *bytes*, *bytes_le*, *fields*, or *int*. Comparison of UUID objects are made by way of comparing their @@ -137,7 +137,7 @@ which relays any information about the UUID's safety, using this enumeration: .. attribute:: UUID.urn - The UUID as a URN as specified in :rfc:`4122`. + The UUID as a URN as specified in :rfc:`9562`. .. attribute:: UUID.variant @@ -172,7 +172,7 @@ The :mod:`uuid` module defines the following functions: runs, it may launch a separate program, which could be quite slow. If all attempts to obtain the hardware address fail, we choose a random 48-bit number with the multicast bit (least significant bit of the first octet) - set to 1 as recommended in :rfc:`4122`. "Hardware address" means the MAC + set to 1 as recommended in :rfc:`9562`. "Hardware address" means the MAC address of a network interface. On a machine with multiple network interfaces, universally administered MAC addresses (i.e. where the second least significant bit of the first octet is *unset*) will be preferred over @@ -266,7 +266,9 @@ of the :attr:`~UUID.variant` attribute: .. data:: RFC_4122 - Specifies the UUID layout given in :rfc:`4122`. + Specifies the UUID layout given in :rfc:`4122`. This constant is kept + for backward compatibility even though :rfc:`4122` has been superseeded + by :rfc:`9562`. .. data:: RESERVED_MICROSOFT @@ -281,7 +283,7 @@ of the :attr:`~UUID.variant` attribute: .. seealso:: - :rfc:`4122` - A Universally Unique IDentifier (UUID) URN Namespace + :rfc:`9562` - A Universally Unique IDentifier (UUID) URN Namespace This specification defines a Uniform Resource Name namespace for UUIDs, the internal format of UUIDs, and methods of generating UUIDs. diff --git a/Lib/uuid.py b/Lib/uuid.py index fac7e32deb6275..9c6ad9643cf6d5 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -1,9 +1,8 @@ -r"""UUID objects (universally unique identifiers) according to RFC 4122. +r"""UUID objects (universally unique identifiers) according to RFC 4122/9562. This module provides immutable UUID objects (class UUID) and the functions uuid1(), uuid3(), uuid4(), uuid5(), and uuid8() for generating version 1, 3, -4, 5, and 8 UUIDs as specified in RFC 4122 (superseeded by RFC 9562 but still -referred to as RFC 4122 for compatibility purposes). +4, 5, and 8 UUIDs as specified in RFC 4122/9562. If all you want is a unique ID, you should probably call uuid1() or uuid4(). Note that uuid1() may compromise privacy since it creates a UUID containing @@ -125,7 +124,7 @@ class UUID: int the UUID as a 128-bit integer - urn the UUID as a URN as specified in RFC 4122 + urn the UUID as a URN as specified in RFC 4122/9562 variant the UUID variant (one of the constants RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE) @@ -217,7 +216,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, if version is not None: if not 1 <= version <= 8: raise ValueError('illegal version number') - # Set the variant to RFC 4122. + # Set the variant to RFC 4122/9562. int &= ~(0xc000 << 48) int |= 0x8000 << 48 # Set the version number. @@ -356,7 +355,7 @@ def variant(self): @property def version(self): - # The version bits are only meaningful for RFC 4122 UUIDs. + # The version bits are only meaningful for RFC 4122/9562 UUIDs. if self.variant == RFC_4122: return int((self.int >> 76) & 0xf) From b082c9085dd65051add3536ffc3abcd82ec9717f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 25 Sep 2024 13:41:15 +0200 Subject: [PATCH 15/29] Update docs --- Doc/library/uuid.rst | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 111a313bf3bdec..11b15294535f50 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -12,7 +12,8 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5` for -generating version 1, 3, 4, 5, and 8 UUIDs as specified in :rfc:`9562`. +generating version 1, 3, 4, 5, and 8 UUIDs as specified in :rfc:`9562` (which +superseeds :rfc:`4122`). If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -172,7 +173,7 @@ The :mod:`uuid` module defines the following functions: runs, it may launch a separate program, which could be quite slow. If all attempts to obtain the hardware address fail, we choose a random 48-bit number with the multicast bit (least significant bit of the first octet) - set to 1 as recommended in :rfc:`9562`. "Hardware address" means the MAC + set to 1 as recommended in :rfc:`4122`. "Hardware address" means the MAC address of a network interface. On a machine with multiple network interfaces, universally administered MAC addresses (i.e. where the second least significant bit of the first octet is *unset*) will be preferred over @@ -223,7 +224,14 @@ The :mod:`uuid` module defines the following functions: .. function:: uuid8(a=None, b=None, c=None) - TODO + Generate a pseudo-random UUID according to + :rfc:`RFC 9562, §5.8 <9562#section-5.8>`. + + When specified, the parameters *a*, *b* and *c* are expected to be + positive integers of 48, 12 and 62 bits respectively. If they exceed + their expected bit count, only their least significant bits are kept; + non-specified arguments are substituted for a pseudo-random integer of + appropriate size. .. versionadded:: 3.14 @@ -299,7 +307,7 @@ The :mod:`uuid` module can be executed as a script from the command line. .. code-block:: sh - python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5}] [-n NAMESPACE] [-N NAME] + python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid8}] [-n NAMESPACE] [-N NAME] The following options are accepted: From 5e97cc32343023f6c415ee042d00d3eeaef4913d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 11 Nov 2024 16:44:00 +0100 Subject: [PATCH 16/29] Apply suggestions from code review Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/library/uuid.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 11b15294535f50..658e8491f56bf1 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -13,7 +13,7 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5` for generating version 1, 3, 4, 5, and 8 UUIDs as specified in :rfc:`9562` (which -superseeds :rfc:`4122`). +supersedes :rfc:`4122`). If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -275,7 +275,7 @@ of the :attr:`~UUID.variant` attribute: .. data:: RFC_4122 Specifies the UUID layout given in :rfc:`4122`. This constant is kept - for backward compatibility even though :rfc:`4122` has been superseeded + for backward compatibility even though :rfc:`4122` has been superseded by :rfc:`9562`. From 051f34e734bbbfa41563dce8129eb31d9ada329e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 11 Nov 2024 16:44:51 +0100 Subject: [PATCH 17/29] Update Lib/test/test_uuid.py --- Lib/test/test_uuid.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 39f65e2847e0ec..7bd26a8ca34b62 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -707,8 +707,8 @@ def test_uuid8(self): equal(u.int & 0x3fffffffffffffff, lo) def test_uuid8_uniqueness(self): - """Test that UUIDv8-generated values are unique (up to a negligible - probability of failure).""" + # Test that UUIDv8-generated values are unique + # (up to a negligible probability of failure). u1 = self.uuid.uuid8() u2 = self.uuid.uuid8() self.assertNotEqual(u1.int, u2.int) From bdf9a77e7eebf4d33d7bd9d9480c9784907fcff6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 11 Nov 2024 16:46:32 +0100 Subject: [PATCH 18/29] Apply suggestions from code review --- Doc/library/uuid.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 658e8491f56bf1..6166c22caedf81 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -153,7 +153,7 @@ which relays any information about the UUID's safety, using this enumeration: The UUID version number (1 through 8, meaningful only when the variant is :const:`RFC_4122`). - .. versionchanged:: 3.14 + .. versionchanged:: next Added UUID version 8. @@ -233,7 +233,7 @@ The :mod:`uuid` module defines the following functions: non-specified arguments are substituted for a pseudo-random integer of appropriate size. - .. versionadded:: 3.14 + .. versionadded:: next .. index:: single: uuid8 From 2e390727c00da45cdc102814d805c88c17877dfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 14 Nov 2024 09:56:33 +0100 Subject: [PATCH 19/29] update CLI --- Doc/library/uuid.rst | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 97896f0ed8ea52..33dbca5231ac9e 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -12,12 +12,8 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5` for -<<<<<<< HEAD -generating version 1, 3, 4, 5, and 7 UUIDs as specified in :rfc:`9562`. -======= -generating version 1, 3, 4, 5, and 8 UUIDs as specified in :rfc:`9562` (which -supersedes :rfc:`4122`). ->>>>>>> origin/uuid-v8-89083 +generating version 1, 3, 4, 5, 7, and 8 UUIDs as specified in :rfc:`9562` +(which supersedes :rfc:`4122`). If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -154,13 +150,12 @@ which relays any information about the UUID's safety, using this enumeration: .. attribute:: UUID.version -<<<<<<< HEAD - The UUID version number (1 through 7, meaningful only when the variant is -======= The UUID version number (1 through 8, meaningful only when the variant is ->>>>>>> origin/uuid-v8-89083 :const:`RFC_4122`). + .. versionchanged:: next + Added UUID version 7. + .. versionchanged:: next Added UUID version 8. @@ -322,7 +317,7 @@ The :mod:`uuid` module can be executed as a script from the command line. .. code-block:: sh - python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid8}] [-n NAMESPACE] [-N NAME] + python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid7,uuid8}] [-n NAMESPACE] [-N NAME] The following options are accepted: @@ -338,6 +333,9 @@ The following options are accepted: Specify the function name to use to generate the uuid. By default :func:`uuid4` is used. + .. versionchanged:: next + Expose UUID version 7 via ``uuid7``. + .. option:: -n --namespace From 694e07fdc59e84dec4768721974cf52bb820b389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 14 Nov 2024 10:02:44 +0100 Subject: [PATCH 20/29] post-merge --- Doc/library/uuid.rst | 13 ++++++++----- Doc/whatsnew/3.14.rst | 1 - Lib/uuid.py | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 33dbca5231ac9e..2ace429bab8533 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -11,9 +11,9 @@ -------------- This module provides immutable :class:`UUID` objects (the :class:`UUID` class) -and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5` for -generating version 1, 3, 4, 5, 7, and 8 UUIDs as specified in :rfc:`9562` -(which supersedes :rfc:`4122`). +and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5`, +:func:`uuid7`, and :func:`uuid8` for generating version 1, 3, 4, 5, 7, and 8 +UUIDS as specified in :rfc:`9562` (which supersedes :rfc:`4122`). If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -176,7 +176,7 @@ The :mod:`uuid` module defines the following functions: runs, it may launch a separate program, which could be quite slow. If all attempts to obtain the hardware address fail, we choose a random 48-bit number with the multicast bit (least significant bit of the first octet) - set to 1 as recommended in :rfc:`9562`. "Hardware address" means the MAC + set to 1 as recommended in :rfc:`4122`. "Hardware address" means the MAC address of a network interface. On a machine with multiple network interfaces, universally administered MAC addresses (i.e. where the second least significant bit of the first octet is *unset*) will be preferred over @@ -334,7 +334,10 @@ The following options are accepted: is used. .. versionchanged:: next - Expose UUID version 7 via ``uuid7``. + Allow generating UUID version 7. + + .. versionchanged:: next + Allow generating UUID version 8. .. option:: -n --namespace diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 7253e65267a6ff..a5251c1acd0bf9 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -522,7 +522,6 @@ uuid * Add support for UUID version 7 via :func:`uuid.uuid7` as specified in :rfc:`9562`. - (Contributed by Bénédikt Tran in :gh:`89083`.) * Add support for UUID version 8 via :func:`uuid.uuid8` as specified diff --git a/Lib/uuid.py b/Lib/uuid.py index e0bc394b00ce6a..b36a29dd75c2e3 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -1,8 +1,8 @@ r"""UUID objects (universally unique identifiers) according to RFC 4122/9562. This module provides immutable UUID objects (class UUID) and the functions -uuid1(), uuid3(), uuid4(), uuid5(), and uuid7() for generating version 1, 3, -4, 5, 7, and 8 UUIDs as specified in RFC 4122/9562. +uuid1(), uuid3(), uuid4(), uuid5(), uuid7(), and uuid8() for generating +version 1, 3, 4, 5, 7, and 8 UUIDs as specified in RFC 4122/9562. If all you want is a unique ID, you should probably call uuid1() or uuid4(). Note that uuid1() may compromise privacy since it creates a UUID containing From 7ff4368afb66179a5195c5b7eb7130148540cc8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 14 Nov 2024 10:13:41 +0100 Subject: [PATCH 21/29] improve readability --- Lib/uuid.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index b36a29dd75c2e3..e8a8c51cb06e6c 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -743,9 +743,9 @@ def uuid7(): def get_counter_and_tail(): rand = int.from_bytes(os.urandom(10)) # 42-bit counter with MSB set to 0 - counter = (rand >> 32) & 0x1ffffffffff + counter = (rand >> 32) & 0x1ff_ffff_ffff # 32-bit random data - tail = rand & 0xffffffff + tail = rand & 0xffff_ffff return counter, tail global _last_timestamp_v7 @@ -762,7 +762,7 @@ def get_counter_and_tail(): timestamp_ms = _last_timestamp_v7 + 1 # advance the counter counter = _last_counter_v7 + 1 - if counter > 0x3ffffffffff: + if counter > 0x3fff_ffff: timestamp_ms += 1 # advance the timestamp counter, tail = get_counter_and_tail() else: @@ -771,10 +771,10 @@ def get_counter_and_tail(): _last_timestamp_v7 = timestamp_ms _last_counter_v7 = counter - int_uuid_7 = (timestamp_ms & 0xffffffffffff) << 80 + int_uuid_7 = (timestamp_ms & 0xffff_ffff_ffff) << 80 int_uuid_7 |= ((counter >> 30) & 0xfff) << 64 - int_uuid_7 |= (counter & 0x3fffffff) << 32 - int_uuid_7 |= tail & 0xffffffff + int_uuid_7 |= (counter & 0x3fff_ffff) << 32 + int_uuid_7 |= tail & 0xffff_ffff return UUID(int=int_uuid_7, version=7) def uuid8(a=None, b=None, c=None): From 7c3cab646acf1fd7842a9ee81f7b4feb9de77a25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 14 Nov 2024 10:33:45 +0100 Subject: [PATCH 22/29] post-merge --- Lib/uuid.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index e8a8c51cb06e6c..f0d9b39c43b7ba 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -760,10 +760,10 @@ def get_counter_and_tail(): else: if timestamp_ms < _last_timestamp_v7: timestamp_ms = _last_timestamp_v7 + 1 - # advance the counter + # advance the 42-bit counter counter = _last_counter_v7 + 1 - if counter > 0x3fff_ffff: - timestamp_ms += 1 # advance the timestamp + if counter > 0x3ff_ffff_ffff: + timestamp_ms += 1 # advance the 48-bit timestamp counter, tail = get_counter_and_tail() else: tail = int.from_bytes(os.urandom(4)) From e75874122f5ea2f86e071df7eb4348f84cad4a13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 14 Nov 2024 10:33:53 +0100 Subject: [PATCH 23/29] uniqueness test --- Lib/test/test_uuid.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index ad157fa720e358..8f4fb080fbed68 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -730,6 +730,18 @@ def test_uuid7(self): equal((u.int >> 32) & 0x3fffffff, counter_lo) equal(u.int & 0xffffffff, tail) + def test_uuid7_uniqueness(self): + # Test that UUIDv7-generated values are unique. + # + # While UUIDv8 has an entropy of 122 bits, those 122 bits may not + # necessarily be sampled from a PRNG. On the other hand, UUIDv7 + # uses os.urandom() as a PRNG which features better randomness. + # + # Until reaching UNIX_EPOCH + 10'000 years, the probability for + # generating two identical UUIDs is negligilbe. + uuids = {self.uuid.uuid7() for _ in range(1000)} + self.assertEqual(len(uuids), 1000) + def test_uuid7_monotonicity(self): equal = self.assertEqual From c18d0c4629d7661795d2e0ac1b5a74e519dfa6b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 14 Nov 2024 10:42:10 +0100 Subject: [PATCH 24/29] improve test comments --- Lib/test/test_uuid.py | 83 +++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 34 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 8f4fb080fbed68..0997e16f0ba41f 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -709,8 +709,11 @@ def test_uuid7(self): random_data = random_bits.to_bytes(10) with ( - mock.patch.object(self.uuid, '_last_timestamp_v7', None), - mock.patch.object(self.uuid, '_last_counter_v7', 0), + mock.patch.multiple( + self.uuid, + _last_timestamp_v7=None, + _last_counter_v7=0, + ), mock.patch('time.time_ns', return_value=timestamp_ns), mock.patch('os.urandom', return_value=random_data) as urand ): @@ -722,13 +725,13 @@ def test_uuid7(self): equal(self.uuid._last_timestamp_v7, timestamp_ms) equal(self.uuid._last_counter_v7, counter) - unix_ts_ms = timestamp_ms & 0xffffffffffff - equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms) + unix_ts_ms = timestamp_ms & 0xffff_ffff_ffff + equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms) - equal((u.int >> 75) & 1, 0) # check that the MSB is 0 + equal((u.int >> 75) & 1, 0) # check that the MSB is 0 equal((u.int >> 64) & 0xfff, counter_hi) - equal((u.int >> 32) & 0x3fffffff, counter_lo) - equal(u.int & 0xffffffff, tail) + equal((u.int >> 32) & 0x3fff_ffff, counter_lo) + equal(u.int & 0xffff_ffff, tail) def test_uuid7_uniqueness(self): # Test that UUIDv7-generated values are unique. @@ -748,13 +751,18 @@ def test_uuid7_monotonicity(self): us = [self.uuid.uuid7() for _ in range(10_000)] equal(us, sorted(us)) - with mock.patch.multiple(self.uuid, _last_timestamp_v7=0, _last_counter_v7=0): + with mock.patch.multiple( + self.uuid, + _last_timestamp_v7=0, + _last_counter_v7=0 + ): # 1 Jan 2023 12:34:56.123_456_789 timestamp_ns = 1672533296_123_456_789 # ns precision timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + # counter_{hi,lo} are chosen so that "counter + 1" does not overflow counter_hi = random.getrandbits(11) - counter_lo = random.getrandbits(29) # make sure that +1 does not overflow + counter_lo = random.getrandbits(29) counter = (counter_hi << 30) | counter_lo tail = random.getrandbits(32) @@ -770,10 +778,10 @@ def test_uuid7_monotonicity(self): equal(self.uuid._last_timestamp_v7, timestamp_ms) equal(self.uuid._last_counter_v7, counter) equal((u1.int >> 64) & 0xfff, counter_hi) - equal((u1.int >> 32) & 0x3fffffff, counter_lo) - equal(u1.int & 0xffffffff, tail) + equal((u1.int >> 32) & 0x3fff_ffff, counter_lo) + equal(u1.int & 0xffff_ffff, tail) - # 1 Jan 2023 12:34:56.123_457_032 (same millisecond but not same prec) + # 1 Jan 2023 12:34:56.123_457_032 (same millisecond but not same ns) next_timestamp_ns = 1672533296_123_457_032 next_timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) equal(timestamp_ms, next_timestamp_ms) @@ -789,11 +797,11 @@ def test_uuid7_monotonicity(self): urand.assert_called_once_with(4) # same milli-second equal(self.uuid._last_timestamp_v7, timestamp_ms) - # counter advanced by 1 + # 42-bit counter advanced by 1 equal(self.uuid._last_counter_v7, counter + 1) equal((u2.int >> 64) & 0xfff, counter_hi) - equal((u2.int >> 32) & 0x3fffffff, counter_lo + 1) - equal(u2.int & 0xffffffff, next_fail) + equal((u2.int >> 32) & 0x3fff_ffff, counter_lo + 1) + equal(u2.int & 0xffff_ffff, next_fail) self.assertLess(u1, u2) @@ -804,32 +812,36 @@ def test_uuid7_timestamp_backwards(self): timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) fake_last_timestamp_v7 = timestamp_ms + 1 + # counter_{hi,lo} are chosen so that "counter + 1" does not overflow counter_hi = random.getrandbits(11) - counter_lo = random.getrandbits(29) # make sure that +1 does not overflow + counter_lo = random.getrandbits(29) counter = (counter_hi << 30) | counter_lo tail_bytes = os.urandom(4) tail = int.from_bytes(tail_bytes) with ( - mock.patch.object(self.uuid, '_last_timestamp_v7', fake_last_timestamp_v7), - mock.patch.object(self.uuid, '_last_counter_v7', counter), + mock.patch.multiple( + self.uuid, + _last_timestamp_v7=fake_last_timestamp_v7, + _last_counter_v7=counter, + ), mock.patch('time.time_ns', return_value=timestamp_ns), - mock.patch('os.urandom', return_value=tail_bytes) as os_urandom_fake + mock.patch('os.urandom', return_value=tail_bytes) as urand ): u = self.uuid.uuid7() - os_urandom_fake.assert_called_once_with(4) + urand.assert_called_once_with(4) equal(u.variant, self.uuid.RFC_4122) equal(u.version, 7) equal(self.uuid._last_timestamp_v7, fake_last_timestamp_v7 + 1) - unix_ts_ms = (fake_last_timestamp_v7 + 1) & 0xffffffffffff - equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms) - # counter advanced by 1 + unix_ts_ms = (fake_last_timestamp_v7 + 1) & 0xffff_ffff_ffff + equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms) + # 42-bit counter advanced by 1 equal(self.uuid._last_counter_v7, counter + 1) equal((u.int >> 64) & 0xfff, counter_hi) - # counter advanced by 1 (constructed so that counter_hi is unchanged) - equal((u.int >> 32) & 0x3fffffff, counter_lo + 1) - equal(u.int & 0xffffffff, tail) + # 42-bit counter advanced by 1 (counter_hi is untouched) + equal((u.int >> 32) & 0x3fff_ffff, counter_lo + 1) + equal(u.int & 0xffff_ffff, tail) def test_uuid7_overflow_counter(self): equal = self.assertEqual @@ -846,9 +858,12 @@ def test_uuid7_overflow_counter(self): random_data = random_bits.to_bytes(10) with ( - mock.patch.object(self.uuid, '_last_timestamp_v7', timestamp_ms), - # same timestamp, but force an overflow on the counter - mock.patch.object(self.uuid, '_last_counter_v7', 0x3ffffffffff), + mock.patch.multiple( + self.uuid, + _last_timestamp_v7=timestamp_ms, + # same timestamp, but force an overflow on the counter + _last_counter_v7=0x3ff_ffff_ffff, + ), mock.patch('time.time_ns', return_value=timestamp_ns), mock.patch('os.urandom', return_value=random_data) as urand ): @@ -858,13 +873,13 @@ def test_uuid7_overflow_counter(self): equal(u.version, 7) # timestamp advanced due to overflow equal(self.uuid._last_timestamp_v7, timestamp_ms + 1) - unix_ts_ms = (timestamp_ms + 1) & 0xffffffffffff - equal((u.int >> 80) & 0xffffffffffff, unix_ts_ms) - # counter overflow, so we picked a new one + unix_ts_ms = (timestamp_ms + 1) & 0xffff_ffff_ffff + equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms) + # counter overflowed, so we picked a new one equal(self.uuid._last_counter_v7, new_counter) equal((u.int >> 64) & 0xfff, new_counter_hi) - equal((u.int >> 32) & 0x3fffffff, new_counter_lo) - equal(u.int & 0xffffffff, tail) + equal((u.int >> 32) & 0x3fff_ffff, new_counter_lo) + equal(u.int & 0xffff_ffff, tail) def test_uuid8(self): equal = self.assertEqual From 6fcb6a10f64ba0b1c20a6f7cba716147e806af3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 15 Nov 2024 09:49:06 +0100 Subject: [PATCH 25/29] fix lint --- Lib/test/test_uuid.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 0997e16f0ba41f..520d187ce991c9 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -1,7 +1,3 @@ -import random -import unittest -from test import support -from test.support import import_helper import builtins import contextlib import copy @@ -12,9 +8,13 @@ import random import sys import weakref +import unittest from itertools import product from unittest import mock +from test import support +from test.support import import_helper + py_uuid = import_helper.import_fresh_module('uuid', blocked=['_uuid']) c_uuid = import_helper.import_fresh_module('uuid', fresh=['_uuid']) From be3f0243069d38cc62354902934e4d3dd33e058a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 15 Nov 2024 09:51:43 +0100 Subject: [PATCH 26/29] post-merge --- Lib/test/test_uuid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 520d187ce991c9..f3dae65c0007be 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -7,8 +7,8 @@ import pickle import random import sys -import weakref import unittest +import weakref from itertools import product from unittest import mock From 06befcaf8fe4f7cbffb6bb59c75939bf8cadf8ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:30:59 +0100 Subject: [PATCH 27/29] use versionchanged instead of versionadded --- Doc/library/uuid.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index dfc664efbfe27b..bba346b1e45f58 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -333,7 +333,7 @@ The following options are accepted: Specify the function name to use to generate the uuid. By default :func:`uuid4` is used. - .. versionadded:: next + .. versionchanged:: next Allow generating UUID versions 7 and 8. .. option:: -n From aee2898bab2928f01df17e35246094bd10951c83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 19 Dec 2024 18:00:26 +0100 Subject: [PATCH 28/29] improve UUIDv7 tests readability --- Lib/test/test_uuid.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index f3dae65c0007be..6361818f9cf669 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -704,7 +704,7 @@ def test_uuid7(self): random_bits = counter << 32 | tail # set all remaining MSB of fake random bits to 1 to ensure that - # the implementation correctly remove them + # the implementation correctly removes them random_bits = (((1 << 7) - 1) << 73) | random_bits random_data = random_bits.to_bytes(10) @@ -754,7 +754,7 @@ def test_uuid7_monotonicity(self): with mock.patch.multiple( self.uuid, _last_timestamp_v7=0, - _last_counter_v7=0 + _last_counter_v7=0, ): # 1 Jan 2023 12:34:56.123_456_789 timestamp_ns = 1672533296_123_456_789 # ns precision @@ -764,6 +764,7 @@ def test_uuid7_monotonicity(self): counter_hi = random.getrandbits(11) counter_lo = random.getrandbits(29) counter = (counter_hi << 30) | counter_lo + self.assertLess(counter + 1, 0x3ff_ffff_ffff) tail = random.getrandbits(32) random_bits = counter << 32 | tail @@ -816,6 +817,7 @@ def test_uuid7_timestamp_backwards(self): counter_hi = random.getrandbits(11) counter_lo = random.getrandbits(29) counter = (counter_hi << 30) | counter_lo + self.assertLess(counter + 1, 0x3ff_ffff_ffff) tail_bytes = os.urandom(4) tail = int.from_bytes(tail_bytes) @@ -854,7 +856,7 @@ def test_uuid7_overflow_counter(self): new_counter = (new_counter_hi << 30) | new_counter_lo tail = random.getrandbits(32) - random_bits = new_counter << 32 | tail + random_bits = (new_counter << 32) | tail random_data = random_bits.to_bytes(10) with ( From 1a5ac19825deca8f81ac43aca20ceb3bce1af207 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 19 Dec 2024 18:14:48 +0100 Subject: [PATCH 29/29] improve UUIDv7 uniqueness tests --- Lib/test/test_uuid.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 6361818f9cf669..ca84895be95f75 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -741,9 +741,13 @@ def test_uuid7_uniqueness(self): # uses os.urandom() as a PRNG which features better randomness. # # Until reaching UNIX_EPOCH + 10'000 years, the probability for - # generating two identical UUIDs is negligilbe. - uuids = {self.uuid.uuid7() for _ in range(1000)} - self.assertEqual(len(uuids), 1000) + # generating two identical UUIDs is negligible. + N = 1000 + uuids = {self.uuid.uuid7() for _ in range(N)} + self.assertEqual(len(uuids), N) + + versions = {u.version for u in uuids} + self.assertSetEqual(versions, {7}) def test_uuid7_monotonicity(self): equal = self.assertEqual