Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IH-577 Implement v7 UUID generation #5866

Merged
merged 1 commit into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion ocaml/libs/uuid/dune
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,13 @@
(public_name uuid)
(modules uuidx)
(libraries
unix (re_export uuidm)
mtime
mtime.clock.os
Copy link

@dbuenzli dbuenzli Oct 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that both {ptime,mtime}.clock.os are deprecated use {ptime,mtime}.clock if you are using the latest version (also clocks are made to re-export the base package so you can also specify only those if you wish so). See https://discuss.ocaml.org/t/ann-ptime-1-2-0-mtime-2-1-0-qrc-0-2-0/15268

ptime
ptime.clock.os
threads.posix
unix
(re_export uuidm)
)
(wrapped false)
)
Expand Down
72 changes: 72 additions & 0 deletions ocaml/libs/uuid/uuid_test.ml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,46 @@ let uuid_arrays =
let non_uuid_arrays =
[[|0|]; [|0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14|]]

let uuid_v7_times =
let of_ms ms = Int64.mul 1_000_000L (Int64.of_float ms) in
let power_of_2_ms n = Float.pow 2.0 (Float.of_int n) |> of_ms in
let zero = 0L in
let ms = 1_000_000L in
let ns = 1L in
(* Using RFC9562 "method 3" for representiong sub-millisecond fractions,
that smallest amount of time a v7 UUID can represent is 1 / 4096 ms,
which is (just more than) 244 nanoseconds *)
let tick = 245L in
let ( + ) = Int64.add in
let ( - ) = Int64.sub in
[
(zero, "00000000-0000-7000-8000-000000000000")
; (tick, "00000000-0000-7001-8000-000000000000")
; (ms, "00000000-0001-7000-8000-000000000000")
; (ms - ns, "00000000-0000-7fff-8000-000000000000")
(* Test a wide range of dates - however, we can't express dates of
beyond epoch + (2^64 - 1) nanoseconds, which is about approximately
epoch + 2^44 milliseconds - some point in the 26th century *)
; (power_of_2_ms 05, "00000000-0020-7000-8000-000000000000")
; (power_of_2_ms 10, "00000000-0400-7000-8000-000000000000")
; (power_of_2_ms 15, "00000000-8000-7000-8000-000000000000")
; (power_of_2_ms 20, "00000010-0000-7000-8000-000000000000")
; (power_of_2_ms 25, "00000200-0000-7000-8000-000000000000")
; (power_of_2_ms 30, "00004000-0000-7000-8000-000000000000")
; (power_of_2_ms 35, "00080000-0000-7000-8000-000000000000")
; (power_of_2_ms 40, "01000000-0000-7000-8000-000000000000")
; (power_of_2_ms 44, "10000000-0000-7000-8000-000000000000")
; (power_of_2_ms 44 - ns, "0fffffff-ffff-7fff-8000-000000000000")
; (power_of_2_ms 44 + tick, "10000000-0000-7001-8000-000000000000")
]

let uuid_v7_bytes =
[
(1L, "00000000-0000-7000-8000-000000000001")
; (-1L, "00000000-0000-7000-bfff-ffffffffffff")
; (0x1234_5678_9abc_def0L, "00000000-0000-7000-9234-56789abcdef0")
]

type resource = [`Generic]

let uuid_testable : (module Alcotest.TESTABLE with type t = resource Uuidx.t) =
Expand All @@ -51,6 +91,36 @@ let roundtrip_tests testing_uuid =
; ("Roundtrip array conversion", `Quick, test_array)
]

let uuid_v7_time_tests (t, expected_as_string) =
let expected =
match Uuidx.of_string expected_as_string with
| Some uuid ->
uuid
| None ->
Alcotest.fail
(Printf.sprintf "Couldn't convert to UUID: %s" expected_as_string)
in
let test () =
let result = Uuidx.make_v7_uuid_from_parts t 0L in
Alcotest.(check @@ uuid_testable) "make UUIDv7" expected result
in
(expected_as_string, [("Make UUIDv7 from time", `Quick, test)])

let uuid_v7_bytes_tests (rand_b, expected_as_string) =
let expected =
match Uuidx.of_string expected_as_string with
| Some uuid ->
uuid
| None ->
Alcotest.fail
(Printf.sprintf "Couldn't convert to UUID: %s" expected_as_string)
in
let test () =
let result = Uuidx.make_v7_uuid_from_parts 0L rand_b in
Alcotest.(check @@ uuid_testable) "make UUIDv7" expected result
in
(expected_as_string, [("Make UUIDv7 from bytes", `Quick, test)])

let string_roundtrip_tests testing_string =
let testing_uuid =
match Uuidx.of_string testing_string with
Expand Down Expand Up @@ -111,6 +181,8 @@ let regression_tests =
; List.map array_roundtrip_tests uuid_arrays
; List.map invalid_string_tests non_uuid_strings
; List.map invalid_array_tests non_uuid_arrays
; List.map uuid_v7_time_tests uuid_v7_times
; List.map uuid_v7_bytes_tests uuid_v7_bytes
]

let () = Alcotest.run "Uuid" regression_tests
39 changes: 32 additions & 7 deletions ocaml/libs/uuid/uuidx.ml
Original file line number Diff line number Diff line change
Expand Up @@ -131,21 +131,46 @@ let read_bytes dev n =

let make_uuid_urnd () = of_bytes (read_bytes dev_urandom_fd 16) |> Option.get

(** Use non-CSPRNG by default, for CSPRNG see {!val:make_uuid_urnd} *)
let make_uuid_fast =
let uuid_state = Random.State.make_self_init () in
(* State for random number generation. Random.State.t isn't thread safe, so
only use this via with_non_csprng_state, which takes care of this.
*)
let rstate = Random.State.make_self_init ()

let rstate_m = Mutex.create ()

let with_non_csprng_state =
(* On OCaml 5 we could use Random.State.split instead,
and on OCaml 4 the mutex may not be strictly needed
*)
let m = Mutex.create () in
let finally () = Mutex.unlock m in
let gen = Uuidm.v4_gen uuid_state in
fun () -> Mutex.lock m ; Fun.protect ~finally gen
let finally () = Mutex.unlock rstate_m in
fun f ->
Mutex.lock rstate_m ;
Fun.protect ~finally (f rstate)

(** Use non-CSPRNG by default, for CSPRNG see {!val:make_uuid_urnd} *)
let make_uuid_fast () = with_non_csprng_state Uuidm.v4_gen

let make_default = ref make_uuid_urnd

let make () = !make_default ()

let make_v7_uuid_from_parts time_ns rand_b = Uuidm.v7_ns ~time_ns ~rand_b

let rand64 () =
with_non_csprng_state (fun rstate () -> Random.State.bits64 rstate)

let now_ns =
let start = Mtime_clock.counter () in
let t0 =
let d, ps = Ptime_clock.now () |> Ptime.to_span |> Ptime.Span.to_d_ps in
Int64.(add (mul (of_int d) 86_400_000_000_000L) (div ps 1000L))
in
fun () ->
let since_t0 = Mtime_clock.count start |> Mtime.Span.to_uint64_ns in
Int64.add t0 since_t0

let make_v7_uuid () = make_v7_uuid_from_parts (now_ns ()) (rand64 ())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are only 2048 possible valued for the rand64, how are we avoiding duplicates?

In general, I'd prefer to use a counter here, it would ensure that no duplicates are ever produced, and that sequentially-created uuids are properly ordered.

However, the rates needed for this to happen are rather high, and I doubt this affects how the toolstack will use the uuids, this can be changed at a later time if we found it's an impediment.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are only 2048 possible valued for the rand64, how are we avoiding duplicates?

I'd been assuming that Random.State.bits64 would give 2^64 different values. We're only using 62 of its bits, but that still gives us about 4 * 10^18 different values, which I would have thought we could rely on chance to avoid getting duplicate values within 245 nanoseconds of each other. Or is the Random module very much worse than I'd imagined?


type cookie = string

let make_cookie () =
Expand Down
14 changes: 12 additions & 2 deletions ocaml/libs/uuid/uuidx.mli
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,7 @@ type all = [without_secret | secret]
type 'a t = Uuidm.t constraint 'a = [< all]

val null : [< not_secret] t
(** A null UUID, as if such a thing actually existed. It turns out to be
useful though. *)
(** A null UUID, as defined in RFC 9562 5.9. *)

val make : unit -> [< not_secret] t
(** Create a fresh UUID *)
Expand All @@ -130,6 +129,17 @@ val make_uuid_fast : unit -> [< not_secret] t
Don't use this to generate secrets, see {!val:make_uuid_urnd} for that instead.
*)

val make_v7_uuid_from_parts : int64 -> int64 -> [< not_secret] t
(** For testing only: create a v7 UUID, as defined in RFC 9562 5.7 *)

val make_v7_uuid : unit -> [< not_secret] t
(** Create a fresh v7 UUID, as defined in RFC 9562 5.7. This incorporates a
POSIX timestamp, such that the alphabetic of any two such UUIDs will match
the timestamp order - provided that they are at least 245 nanoseconds
apart. Note that in order to ensure that the timestamps used are
monotonic, operating time adjustments are ignored and hence timestamps
only approximate system time. *)

val pp : Format.formatter -> [< not_secret] t -> unit

val equal : 'a t -> 'a t -> bool
Expand Down
Loading