Skip to content

Commit

Permalink
IH-577 Implement v7 UUID generation
Browse files Browse the repository at this point in the history
* New function Uuidx.make_v7_uuid, with the idea being that ordering v7
  UUIDs alphabetically will also order them by creation time
* The values produced by Uuidx.make_uuid_urnd hadn't necessarily been
  valid UUIDs, since the variant and version fields were being filled in
  randomly - this is now fixed so that it returns v4 UUIDs.
* There are a couple of functions for generating v4 and v7 from known
  inputs, for the purpose of unit testing. (The v4 function is mainly
  there so I could check the setting of variant and version fields by
  comparing the output with that which Python's UUID module produces.)

Signed-off-by: Robin Newton <[email protected]>
  • Loading branch information
Robin Newton committed Aug 9, 2024
1 parent a41c3fe commit 037eef9
Show file tree
Hide file tree
Showing 4 changed files with 214 additions and 7 deletions.
9 changes: 7 additions & 2 deletions ocaml/libs/uuid/dune
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
(public_name uuid)
(modules uuidx)
(libraries
unix (re_export uuidm)
mtime
mtime.clock.os
ptime
ptime.clock.os
unix
(re_export uuidm)
)
(wrapped false)
)
Expand All @@ -12,5 +17,5 @@
(name uuid_test)
(package uuid)
(modules uuid_test)
(libraries alcotest fmt uuid)
(libraries alcotest fmt ptime uuid)
)
121 changes: 121 additions & 0 deletions ocaml/libs/uuid/uuid_test.ml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,67 @@ let uuid_arrays =
let non_uuid_arrays =
[[|0|]; [|0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14|]]

let uuid_v4_cases =
[
((0L, 0L), "00000000-0000-4000-8000-000000000000")
; ((-1L, -1L), "ffffffff-ffff-4fff-bfff-ffffffffffff")
; ((0x89ab_cdefL, 0x0123_4567L), "00000000-89ab-4def-8000-000001234567")
]

let uuid_v7_times =
let of_ms ms = Ptime.Span.of_float_s (ms /. 1000.0) |> Option.get in
let power_of_2_ms n =
Float.pow 2.0 (Float.of_int n) |> of_ms |> Ptime.Span.truncate ~frac_s:3
in
let zero = of_ms 0.0 in
let ms = of_ms 1.0 in
let ps = Ptime.Span.of_d_ps (0, 1L) |> Option.get in
(* Using RFC9562 "method 3" for representiong sub-millisecond fractions,
that smallest amount of time a v7 UUID can represent is 1 / 4096 ms,
which is (just less than) 244141 picoseconds *)
let tick = Ptime.Span.of_d_ps (0, 244141L) |> Option.get in
let ( - ) = Ptime.Span.sub in
let to_d_ps = Ptime.Span.to_d_ps in
[
(zero |> to_d_ps, "00000000-0000-7000-8000-000000000000")
; (tick |> to_d_ps, "00000000-0000-7001-8000-000000000000")
; (ms |> to_d_ps, "00000000-0001-7000-8000-000000000000")
; (ms - ps |> to_d_ps, "00000000-0000-7fff-8000-000000000000")
(* Test a wide range of dates - but we can't get much bigger than
epoch + 2^47 milliseconds, and that puts us in the year 6429 and Ptime
only allows dates up to the year 9999 *)
; (power_of_2_ms 05 |> to_d_ps, "00000000-0020-7000-8000-000000000000")
; (power_of_2_ms 15 |> to_d_ps, "00000000-8000-7000-8000-000000000000")
; (power_of_2_ms 25 |> to_d_ps, "00000200-0000-7000-8000-000000000000")
; (power_of_2_ms 35 |> to_d_ps, "00080000-0000-7000-8000-000000000000")
; (power_of_2_ms 45 |> to_d_ps, "20000000-0000-7000-8000-000000000000")
; (power_of_2_ms 47 |> to_d_ps, "80000000-0000-7000-8000-000000000000")
; (power_of_2_ms 47 - ps |> to_d_ps, "7fffffff-ffff-7fff-8000-000000000000")
]

let uuid_v7_adjustments =
let ( * ) = Int64.mul in
let ( + ) = Int64.add in
let ( / ) = Int64.div in
let ps_in_ns = 1_000L in
let ns_in_ms = 1_000_000L in
let ps_in_ms = ps_in_ns * ns_in_ms in
let ns_in_day = ns_in_ms * 1000L * 60L * 60L * 24L in
[
(0L, (0, 0L))
; (ns_in_ms / 2L, (0, ps_in_ms / 2L))
; (ns_in_ms, (0, ps_in_ms))
; (ns_in_day, (1, 0L))
; (ns_in_day + (ns_in_ms / 4L), (1, ps_in_ms / 4L))
]

let uuid_v7_bytes =
[
(1L, "00000000-0000-7000-8000-000000000001")
; (-1L, "00000000-0000-7000-bfff-ffffffffffff")
; (0x1234_5678_9abc_def0L, "00000000-0000-7000-9234-56789abcdef0")
]

type resource

let uuid_testable : (module Alcotest.TESTABLE with type t = resource Uuidx.t) =
Expand All @@ -51,6 +112,62 @@ let roundtrip_tests testing_uuid =
; ("Roundtrip array conversion", `Quick, test_array)
]

let uuid_v4_tests ((upper, lower), expected_as_string) =
let expected =
match Uuidx.of_string expected_as_string with
| Some uuid ->
uuid
| None ->
Alcotest.fail
(Printf.sprintf "Couldn't convert to UUID: %s" expected_as_string)
in
let test () =
let result = Uuidx.make_v4_uuid upper lower in
Alcotest.(check @@ uuid_testable) "make UUIDv4" expected result
in
(expected_as_string, [("Make UUIDv4 from bytes", `Quick, test)])

let uuid_v7_time_tests (t, expected_as_string) =
let expected =
match Uuidx.of_string expected_as_string with
| Some uuid ->
uuid
| None ->
Alcotest.fail
(Printf.sprintf "Couldn't convert to UUID: %s" expected_as_string)
in
let test () =
let result = Uuidx.make_v7_uuid_from_parts t 0L 0L in
Alcotest.(check @@ uuid_testable) "make UUIDv7" expected result
in
(expected_as_string, [("Make UUIDv7 from time", `Quick, test)])

let uuid_v7_adjustment_tests (nanos, (days, picos)) =
let day0 = 20000 in
let bs = 0x01234_5678_9abc_defL in
let expected = Uuidx.make_v7_uuid_from_parts (day0 + days, picos) 0L bs in
let expected_as_string = Uuidx.to_string expected in
let test () =
let result = Uuidx.make_v7_uuid_from_parts (day0, 0L) nanos bs in
Alcotest.(check @@ uuid_testable) "make UUIDv7" expected result
in
(expected_as_string, [("Make UUIDv7 with nano adjustment", `Quick, test)])

let uuid_v7_bytes_tests (bs, expected_as_string) =
let expected =
match Uuidx.of_string expected_as_string with
| Some uuid ->
uuid
| None ->
Alcotest.fail
(Printf.sprintf "Couldn't convert to UUID: %s" expected_as_string)
in
let test () =
let result = Uuidx.make_v7_uuid_from_parts (0, 0L) 0L bs in
Alcotest.(check @@ uuid_testable) "make UUIDv7" expected result
in
(expected_as_string, [("Make UUIDv7 from bytes", `Quick, test)])

let string_roundtrip_tests testing_string =
let testing_uuid =
match Uuidx.of_string testing_string with
Expand Down Expand Up @@ -111,6 +228,10 @@ let regression_tests =
; List.map array_roundtrip_tests uuid_arrays
; List.map invalid_string_tests non_uuid_strings
; List.map invalid_array_tests non_uuid_arrays
; List.map uuid_v4_tests uuid_v4_cases
; List.map uuid_v7_time_tests uuid_v7_times
; List.map uuid_v7_adjustment_tests uuid_v7_adjustments
; List.map uuid_v7_bytes_tests uuid_v7_bytes
]

let () = Alcotest.run "Uuid" regression_tests
74 changes: 71 additions & 3 deletions ocaml/libs/uuid/uuidx.ml
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,79 @@ let read_bytes dev n =
if read <> n then
raise End_of_file
else
Bytes.to_string buf
buf
)
(fun () -> Unix.close fd)

let make_uuid_urnd () = of_bytes (read_bytes dev_urandom 16) |> Option.get
let set_bits buf i value mask =
let old_octet = Bytes.get_uint8 buf i in
let new_octet = lnot mask land old_octet lor (mask land value) in
Bytes.set_uint8 buf i new_octet

let set_var_and_ver ver buf =
(* Sets the 2-bit variant field and the 4-bit version field (see RFC 9562
sections 4.1 and 4.2 respectively) into a 16 byte buffer representing a
UUID. *)
set_bits buf 8 (2 lsl 6) (0x3 lsl 6) ;
set_bits buf 6 (ver lsl 4) (0xf lsl 4) ;
()

let of_buf buf = of_bytes (buf |> Bytes.to_string) |> Option.get

let make_uuid_urnd () =
let buf = read_bytes dev_urandom 16 in
set_var_and_ver 4 buf ; of_buf buf

let make_v4_uuid upper lower =
let buf = Bytes.create 16 in
Bytes.set_int64_be buf 0 upper ;
Bytes.set_int64_be buf 8 lower ;
set_var_and_ver 4 buf ;
of_buf buf

let d_ps_ns_to_ms_ps =
let ms_in_day = 86_400_000L in
let ps_in_ms = 1_000_000_000L in
let ps_in_ns = 1_000L in
let ( + ) = Int64.add in
let ( * ) = Int64.mul in
let ( / ) = Int64.div in
let ( mod ) = Int64.rem in
fun days ps ns_adj ->
let ps_total = ps + (ps_in_ns * ns_adj) in
let ms = (ms_in_day * Int64.of_int days) + (ps_total / ps_in_ms) in
let ps = ps_total mod ps_in_ms in
(ms, ps)

let make_v7_uuid_from_parts =
let ( * ) = Int64.mul in
let ( lsl ) = Int64.shift_left in
let ( lsr ) = Int64.shift_right_logical in
fun (days, picos) nano_adjustment rand_b ->
let buf = Bytes.create 16 in
let ms, ps = d_ps_ns_to_ms_ps days picos nano_adjustment in
(* We are using 12 bits to contain a sub-millisecond fraction, so we want
to converted the remaindered number of picoseconds into the range 0 to
4096. Given there are 10^9 picoseconds in a millisecond, multiplying by
(2^63 / 10^9) converts into the range 0 - 2^63-1, which we can shift to
give the 12-bit value we want. *)
let sub_ms_frac = (ps * 9_223_372_037L) lsr 51 |> Int64.to_int in
Bytes.set_int64_be buf 0 (ms lsl 16) ;
Bytes.set_int16_be buf 6 sub_ms_frac ;
Bytes.set_int64_be buf 8 rand_b ;
set_var_and_ver 7 buf ;
of_buf buf

let make_rand64 () =
let buf = read_bytes dev_urandom 8 in
Bytes.get_int64_ne buf 0

let make_v7_uuid =
let start = Mtime_clock.counter () in
let t0 = Ptime_clock.now () |> Ptime.to_span |> Ptime.Span.to_d_ps in
fun () ->
let ns_since_t0 = Mtime_clock.count start |> Mtime.Span.to_uint64_ns in
make_v7_uuid_from_parts t0 ns_since_t0 (make_rand64 ())

(* Use the CSPRNG-backed urandom *)
let make = make_uuid_urnd
Expand All @@ -66,7 +134,7 @@ type cookie = string

let make_cookie () =
read_bytes dev_urandom 64
|> String.to_seq
|> Bytes.to_seq
|> Seq.map (fun c -> Printf.sprintf "%1x" (int_of_char c))
|> List.of_seq
|> String.concat ""
Expand Down
17 changes: 15 additions & 2 deletions ocaml/libs/uuid/uuidx.mli
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,27 @@
type 'a t

val null : 'a t
(** A null UUID, as if such a thing actually existed. It turns out to be
useful though. *)
(** A null UUID, as defined in RFC 9562 5.9. *)

val make : unit -> 'a t
(** Create a fresh UUID *)

val make_uuid_urnd : unit -> 'a t

val make_v4_uuid : int64 -> int64 -> 'a t
(** For testing only: Create a v4 UUID, as defined in RFC 9562 5.4 *)

val make_v7_uuid_from_parts : int * int64 -> int64 -> int64 -> 'a t
(** For testing only: create a v7 UUID, as defined in RFC 9562 5.7 *)

val make_v7_uuid : unit -> 'a t
(** Create a fresh v7 UUID, as defined in RFC 9562 5.7. This incorporates a
POSIX timestamp, such that the alphabetic of any two such UUIDs will match
the timestamp order - provided that they are at least 245 nanoseconds
apart. Note that in order to ensure that the timestamps used are
monotonic, operating time adjustments are ignored and hence timestamps
only approximate system time. *)

val pp : Format.formatter -> 'a t -> unit

val equal : 'a t -> 'a t -> bool
Expand Down

0 comments on commit 037eef9

Please sign in to comment.