diff --git a/ocaml/libs/uuid/dune b/ocaml/libs/uuid/dune index 5f7c5c25b95..b907ee5e9d5 100644 --- a/ocaml/libs/uuid/dune +++ b/ocaml/libs/uuid/dune @@ -3,7 +3,12 @@ (public_name uuid) (modules uuidx) (libraries - unix (re_export uuidm) + mtime + mtime.clock.os + ptime + ptime.clock.os + unix + (re_export uuidm) ) (wrapped false) ) @@ -12,5 +17,5 @@ (name uuid_test) (package uuid) (modules uuid_test) - (libraries alcotest fmt uuid) + (libraries alcotest fmt ptime uuid) ) diff --git a/ocaml/libs/uuid/uuid_test.ml b/ocaml/libs/uuid/uuid_test.ml index dbaf294545f..69db7a58bef 100644 --- a/ocaml/libs/uuid/uuid_test.ml +++ b/ocaml/libs/uuid/uuid_test.ml @@ -25,6 +25,67 @@ let uuid_arrays = let non_uuid_arrays = [[|0|]; [|0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14|]] +let uuid_v4_cases = + [ + ((0L, 0L), "00000000-0000-4000-8000-000000000000") + ; ((-1L, -1L), "ffffffff-ffff-4fff-bfff-ffffffffffff") + ; ((0x89ab_cdefL, 0x0123_4567L), "00000000-89ab-4def-8000-000001234567") + ] + +let uuid_v7_times = + let of_ms ms = Ptime.Span.of_float_s (ms /. 1000.0) |> Option.get in + let power_of_2_ms n = + Float.pow 2.0 (Float.of_int n) |> of_ms |> Ptime.Span.truncate ~frac_s:3 + in + let zero = of_ms 0.0 in + let ms = of_ms 1.0 in + let ps = Ptime.Span.of_d_ps (0, 1L) |> Option.get in + (* Using RFC9562 "method 3" for representiong sub-millisecond fractions, + that smallest amount of time a v7 UUID can represent is 1 / 4096 ms, + which is (just less than) 244141 picoseconds *) + let tick = Ptime.Span.of_d_ps (0, 244141L) |> Option.get in + let ( - ) = Ptime.Span.sub in + let to_d_ps = Ptime.Span.to_d_ps in + [ + (zero |> to_d_ps, "00000000-0000-7000-8000-000000000000") + ; (tick |> to_d_ps, "00000000-0000-7001-8000-000000000000") + ; (ms |> to_d_ps, "00000000-0001-7000-8000-000000000000") + ; (ms - ps |> to_d_ps, "00000000-0000-7fff-8000-000000000000") + (* Test a wide range of dates - but we can't get much bigger than + epoch + 2^47 milliseconds, and that puts us in the year 6429 and Ptime + only allows dates up to the year 9999 *) + ; (power_of_2_ms 05 |> to_d_ps, "00000000-0020-7000-8000-000000000000") + ; (power_of_2_ms 15 |> to_d_ps, "00000000-8000-7000-8000-000000000000") + ; (power_of_2_ms 25 |> to_d_ps, "00000200-0000-7000-8000-000000000000") + ; (power_of_2_ms 35 |> to_d_ps, "00080000-0000-7000-8000-000000000000") + ; (power_of_2_ms 45 |> to_d_ps, "20000000-0000-7000-8000-000000000000") + ; (power_of_2_ms 47 |> to_d_ps, "80000000-0000-7000-8000-000000000000") + ; (power_of_2_ms 47 - ps |> to_d_ps, "7fffffff-ffff-7fff-8000-000000000000") + ] + +let uuid_v7_adjustments = + let ( * ) = Int64.mul in + let ( + ) = Int64.add in + let ( / ) = Int64.div in + let ps_in_ns = 1_000L in + let ns_in_ms = 1_000_000L in + let ps_in_ms = ps_in_ns * ns_in_ms in + let ns_in_day = ns_in_ms * 1000L * 60L * 60L * 24L in + [ + (0L, (0, 0L)) + ; (ns_in_ms / 2L, (0, ps_in_ms / 2L)) + ; (ns_in_ms, (0, ps_in_ms)) + ; (ns_in_day, (1, 0L)) + ; (ns_in_day + (ns_in_ms / 4L), (1, ps_in_ms / 4L)) + ] + +let uuid_v7_bytes = + [ + (1L, "00000000-0000-7000-8000-000000000001") + ; (-1L, "00000000-0000-7000-bfff-ffffffffffff") + ; (0x1234_5678_9abc_def0L, "00000000-0000-7000-9234-56789abcdef0") + ] + type resource let uuid_testable : (module Alcotest.TESTABLE with type t = resource Uuidx.t) = @@ -51,6 +112,62 @@ let roundtrip_tests testing_uuid = ; ("Roundtrip array conversion", `Quick, test_array) ] +let uuid_v4_tests ((upper, lower), expected_as_string) = + let expected = + match Uuidx.of_string expected_as_string with + | Some uuid -> + uuid + | None -> + Alcotest.fail + (Printf.sprintf "Couldn't convert to UUID: %s" expected_as_string) + in + let test () = + let result = Uuidx.make_v4_uuid upper lower in + Alcotest.(check @@ uuid_testable) "make UUIDv4" expected result + in + (expected_as_string, [("Make UUIDv4 from bytes", `Quick, test)]) + +let uuid_v7_time_tests (t, expected_as_string) = + let expected = + match Uuidx.of_string expected_as_string with + | Some uuid -> + uuid + | None -> + Alcotest.fail + (Printf.sprintf "Couldn't convert to UUID: %s" expected_as_string) + in + let test () = + let result = Uuidx.make_v7_uuid_from_parts t 0L 0L in + Alcotest.(check @@ uuid_testable) "make UUIDv7" expected result + in + (expected_as_string, [("Make UUIDv7 from time", `Quick, test)]) + +let uuid_v7_adjustment_tests (nanos, (days, picos)) = + let day0 = 20000 in + let bs = 0x01234_5678_9abc_defL in + let expected = Uuidx.make_v7_uuid_from_parts (day0 + days, picos) 0L bs in + let expected_as_string = Uuidx.to_string expected in + let test () = + let result = Uuidx.make_v7_uuid_from_parts (day0, 0L) nanos bs in + Alcotest.(check @@ uuid_testable) "make UUIDv7" expected result + in + (expected_as_string, [("Make UUIDv7 with nano adjustment", `Quick, test)]) + +let uuid_v7_bytes_tests (bs, expected_as_string) = + let expected = + match Uuidx.of_string expected_as_string with + | Some uuid -> + uuid + | None -> + Alcotest.fail + (Printf.sprintf "Couldn't convert to UUID: %s" expected_as_string) + in + let test () = + let result = Uuidx.make_v7_uuid_from_parts (0, 0L) 0L bs in + Alcotest.(check @@ uuid_testable) "make UUIDv7" expected result + in + (expected_as_string, [("Make UUIDv7 from bytes", `Quick, test)]) + let string_roundtrip_tests testing_string = let testing_uuid = match Uuidx.of_string testing_string with @@ -111,6 +228,10 @@ let regression_tests = ; List.map array_roundtrip_tests uuid_arrays ; List.map invalid_string_tests non_uuid_strings ; List.map invalid_array_tests non_uuid_arrays + ; List.map uuid_v4_tests uuid_v4_cases + ; List.map uuid_v7_time_tests uuid_v7_times + ; List.map uuid_v7_adjustment_tests uuid_v7_adjustments + ; List.map uuid_v7_bytes_tests uuid_v7_bytes ] let () = Alcotest.run "Uuid" regression_tests diff --git a/ocaml/libs/uuid/uuidx.ml b/ocaml/libs/uuid/uuidx.ml index 01dbda46899..0b425ca77d0 100644 --- a/ocaml/libs/uuid/uuidx.ml +++ b/ocaml/libs/uuid/uuidx.ml @@ -53,11 +53,79 @@ let read_bytes dev n = if read <> n then raise End_of_file else - Bytes.to_string buf + buf ) (fun () -> Unix.close fd) -let make_uuid_urnd () = of_bytes (read_bytes dev_urandom 16) |> Option.get +let set_bits buf i value mask = + let old_octet = Bytes.get_uint8 buf i in + let new_octet = lnot mask land old_octet lor (mask land value) in + Bytes.set_uint8 buf i new_octet + +let set_var_and_ver ver buf = + (* Sets the 2-bit variant field and the 4-bit version field (see RFC 9562 + sections 4.1 and 4.2 respectively) into a 16 byte buffer representing a + UUID. *) + set_bits buf 8 (2 lsl 6) (0x3 lsl 6) ; + set_bits buf 6 (ver lsl 4) (0xf lsl 4) ; + () + +let of_buf buf = of_bytes (buf |> Bytes.to_string) |> Option.get + +let make_uuid_urnd () = + let buf = read_bytes dev_urandom 16 in + set_var_and_ver 4 buf ; of_buf buf + +let make_v4_uuid upper lower = + let buf = Bytes.create 16 in + Bytes.set_int64_be buf 0 upper ; + Bytes.set_int64_be buf 8 lower ; + set_var_and_ver 4 buf ; + of_buf buf + +let d_ps_ns_to_ms_ps = + let ms_in_day = 86_400_000L in + let ps_in_ms = 1_000_000_000L in + let ps_in_ns = 1_000L in + let ( + ) = Int64.add in + let ( * ) = Int64.mul in + let ( / ) = Int64.div in + let ( mod ) = Int64.rem in + fun days ps ns_adj -> + let ps_total = ps + (ps_in_ns * ns_adj) in + let ms = (ms_in_day * Int64.of_int days) + (ps_total / ps_in_ms) in + let ps = ps_total mod ps_in_ms in + (ms, ps) + +let make_v7_uuid_from_parts = + let ( * ) = Int64.mul in + let ( lsl ) = Int64.shift_left in + let ( lsr ) = Int64.shift_right_logical in + fun (days, picos) nano_adjustment rand_b -> + let buf = Bytes.create 16 in + let ms, ps = d_ps_ns_to_ms_ps days picos nano_adjustment in + (* We are using 12 bits to contain a sub-millisecond fraction, so we want + to converted the remaindered number of picoseconds into the range 0 to + 4096. Given there are 10^9 picoseconds in a millisecond, multiplying by + (2^63 / 10^9) converts into the range 0 - 2^63-1, which we can shift to + give the 12-bit value we want. *) + let sub_ms_frac = (ps * 9_223_372_037L) lsr 51 |> Int64.to_int in + Bytes.set_int64_be buf 0 (ms lsl 16) ; + Bytes.set_int16_be buf 6 sub_ms_frac ; + Bytes.set_int64_be buf 8 rand_b ; + set_var_and_ver 7 buf ; + of_buf buf + +let make_rand64 () = + let buf = read_bytes dev_urandom 8 in + Bytes.get_int64_ne buf 0 + +let make_v7_uuid = + let start = Mtime_clock.counter () in + let t0 = Ptime_clock.now () |> Ptime.to_span |> Ptime.Span.to_d_ps in + fun () -> + let ns_since_t0 = Mtime_clock.count start |> Mtime.Span.to_uint64_ns in + make_v7_uuid_from_parts t0 ns_since_t0 (make_rand64 ()) (* Use the CSPRNG-backed urandom *) let make = make_uuid_urnd @@ -66,7 +134,7 @@ type cookie = string let make_cookie () = read_bytes dev_urandom 64 - |> String.to_seq + |> Bytes.to_seq |> Seq.map (fun c -> Printf.sprintf "%1x" (int_of_char c)) |> List.of_seq |> String.concat "" diff --git a/ocaml/libs/uuid/uuidx.mli b/ocaml/libs/uuid/uuidx.mli index 618235b4ae6..26e80327557 100644 --- a/ocaml/libs/uuid/uuidx.mli +++ b/ocaml/libs/uuid/uuidx.mli @@ -27,14 +27,27 @@ type 'a t val null : 'a t -(** A null UUID, as if such a thing actually existed. It turns out to be - useful though. *) +(** A null UUID, as defined in RFC 9562 5.9. *) val make : unit -> 'a t (** Create a fresh UUID *) val make_uuid_urnd : unit -> 'a t +val make_v4_uuid : int64 -> int64 -> 'a t +(** For testing only: Create a v4 UUID, as defined in RFC 9562 5.4 *) + +val make_v7_uuid_from_parts : int * int64 -> int64 -> int64 -> 'a t +(** For testing only: create a v7 UUID, as defined in RFC 9562 5.7 *) + +val make_v7_uuid : unit -> 'a t +(** Create a fresh v7 UUID, as defined in RFC 9562 5.7. This incorporates a + POSIX timestamp, such that the alphabetic of any two such UUIDs will match + the timestamp order - provided that they are at least 245 nanoseconds + apart. Note that in order to ensure that the timestamps used are + monotonic, operating time adjustments are ignored and hence timestamps + only approximate system time. *) + val pp : Format.formatter -> 'a t -> unit val equal : 'a t -> 'a t -> bool