From 6f4fe3597916f6df03a05af6b45c55f1ad1824f0 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 7 Jan 2024 02:06:32 -0300
Subject: [PATCH 01/33] Refactor linear algebra module and remove unused code

---
 bin/test                                      |   2 +-
 blas/README.md                                |  66 +++
 .../blas => blas/blas64}/conversions.v        |   2 +-
 {vlas/internal/blas => blas/blas64}/dgemm.v   |   2 +-
 {vlas/internal/blas => blas/blas64}/dgemv.v   |   2 +-
 .../blas => blas/blas64}/dgemv_test.v         |   2 +-
 {vlas/internal/blas => blas/blas64}/error.v   |   3 +-
 .../internal/blas => blas/blas64}/level1f64.v |   2 +-
 .../blas => blas/blas64}/level1f64_ddot.v     |   2 +-
 .../internal/blas => blas/blas64}/level2f64.v |   2 +-
 .../internal/blas => blas/blas64}/level3f64.v |   2 +-
 {vlas/internal/blas => blas/blas64}/util.v    |   2 +-
 {vlas => blas}/cblas.h                        |   0
 .../cflags_d_vsl_blas_cblas.v                 |   6 +-
 {vlas => blas}/conversions.v                  |   8 +-
 blas/oblas_d_vsl_blas_cblas.v                 | 448 ++++++++++++++++++
 .../oblas_notd_vsl_blas_cblas.v               |  41 +-
 {vlas => blas}/openblas_config.h              |   0
 blas/v.mod                                    |   8 +
 la/blas.v                                     |  32 +-
 la/densesol.v                                 |   6 +-
 la/matrix_ops.v                               |  10 +-
 lapack/README.md                              |  58 +++
 .../cflags_d_vsl_lapack_lapacke copy.v        |   3 +-
 lapack/cflags_notd_vsl_lapack_lapacke.v       |  14 +
 lapack/lapack64/dgesv.v                       |  56 +++
 lapack/lapack64/dgetrf.v                      |  51 ++
 lapack/lapack64/errors.v                      | 178 +++++++
 {vlas => lapack}/lapack_common.v              |  27 +-
 {vlas => lapack}/lapack_default.c.v           |   6 +-
 {vlas => lapack}/lapack_macos.c.v             |   2 +-
 {vlas => lapack}/v.mod                        |   4 +-
 vlas/README.md                                |  83 ----
 vlas/oblas_d_vsl_vlas_cblas.v                 | 448 ------------------
 34 files changed, 963 insertions(+), 615 deletions(-)
 create mode 100644 blas/README.md
 rename {vlas/internal/blas => blas/blas64}/conversions.v (95%)
 rename {vlas/internal/blas => blas/blas64}/dgemm.v (99%)
 rename {vlas/internal/blas => blas/blas64}/dgemv.v (99%)
 rename {vlas/internal/blas => blas/blas64}/dgemv_test.v (99%)
 rename {vlas/internal/blas => blas/blas64}/error.v (98%)
 rename {vlas/internal/blas => blas/blas64}/level1f64.v (99%)
 rename {vlas/internal/blas => blas/blas64}/level1f64_ddot.v (98%)
 rename {vlas/internal/blas => blas/blas64}/level2f64.v (99%)
 rename {vlas/internal/blas => blas/blas64}/level3f64.v (99%)
 rename {vlas/internal/blas => blas/blas64}/util.v (97%)
 rename {vlas => blas}/cblas.h (100%)
 rename vlas/cflags_d_vsl_vlas_cblas.v => blas/cflags_d_vsl_blas_cblas.v (73%)
 rename {vlas => blas}/conversions.v (98%)
 create mode 100644 blas/oblas_d_vsl_blas_cblas.v
 rename vlas/oblas_notd_vsl_vlas_cblas.v => blas/oblas_notd_vsl_blas_cblas.v (53%)
 rename {vlas => blas}/openblas_config.h (100%)
 create mode 100644 blas/v.mod
 create mode 100644 lapack/README.md
 rename vlas/cflags_notd_vsl_vlas_cblas.v => lapack/cflags_d_vsl_lapack_lapacke copy.v (72%)
 create mode 100644 lapack/cflags_notd_vsl_lapack_lapacke.v
 create mode 100644 lapack/lapack64/dgesv.v
 create mode 100644 lapack/lapack64/dgetrf.v
 create mode 100644 lapack/lapack64/errors.v
 rename {vlas => lapack}/lapack_common.v (79%)
 rename {vlas => lapack}/lapack_default.c.v (54%)
 rename {vlas => lapack}/lapack_macos.c.v (94%)
 rename {vlas => lapack}/v.mod (63%)
 delete mode 100644 vlas/README.md
 delete mode 100644 vlas/oblas_d_vsl_vlas_cblas.v

diff --git a/bin/test b/bin/test
index 07dbf064a..d4e1878ed 100755
--- a/bin/test
+++ b/bin/test
@@ -28,7 +28,7 @@ flags=""
 
 if [[ -n "${use_cblas}" ]]; then
     echo "Running tests using Open BLAS"
-    flags="${flags} -d vsl_vlas_cblas"
+    flags="${flags} -d vsl_blas_cblas"
 fi
 
 if [[ -n "${use_autofree}" ]]; then
diff --git a/blas/README.md b/blas/README.md
new file mode 100644
index 000000000..ac8c441d8
--- /dev/null
+++ b/blas/README.md
@@ -0,0 +1,66 @@
+# The V Basic Linear Algebra System
+
+This package implements Basic Linear Algebra System (BLAS) routines in V.
+
+| Backend  | Description                                                                                                                                                        | Status | Compilation Flags   |
+| -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------ | ------------------- |
+| BLAS     | Pure V implementation                                                                                                                                              | Stable | `NONE`              |
+| OpenBLAS | OpenBLAS is an optimized BLAS library based on <https://github.com/xianyi/OpenBLAS>. Check the section [OpenBLAS Backend](#openblas-backend) for more information. | Stable | `-d vsl_blas_cblas` |
+
+Therefore, its routines are a little more _lower level_ than the ones in the package `vsl.la`.
+
+## OpenBLAS Backend
+
+We provide a backend for the OpenBLAS library. This backend is probably the fastest one for all platforms
+but it requires the installation of the OpenBLAS library.
+
+Use the compilation flag `-d vsl_blas_cblas` to use the OpenBLAS backend
+instead of the pure V implementation
+and make sure that the OpenBLAS library is installed in your system.
+
+Check the section below for more information about installing the OpenBLAS library.
+
+<details>
+<summary>Install dependencies</summary>
+
+### Homebrew (macOS)
+
+```sh
+brew install openblas
+```
+
+### Debian/Ubuntu GNU Linux
+
+`libopenblas-dev` is not needed when using the pure V backend.
+
+```sh
+sudo apt-get install -y --no-install-recommends \
+    gcc \
+    gfortran \
+    libopenblas-dev
+```
+
+### Arch Linux/Manjaro GNU Linux
+
+The best way of installing OpenBlas is using
+[lapack-openblas](https://aur.archlinux.org/packages/lapack-openblas/).
+
+```sh
+yay -S lapack-openblas
+```
+
+or
+
+```sh
+git clone https://aur.archlinux.org/lapack-openblas.git /tmp/lapack-openblas
+cd /tmp/lapack-openblas
+makepkg -si
+```
+
+### macOS
+
+```sh
+brew install openblas
+```
+
+</details>
diff --git a/vlas/internal/blas/conversions.v b/blas/blas64/conversions.v
similarity index 95%
rename from vlas/internal/blas/conversions.v
rename to blas/blas64/conversions.v
index 2b4d0d1cc..591e7a2e8 100644
--- a/vlas/internal/blas/conversions.v
+++ b/blas/blas64/conversions.v
@@ -1,4 +1,4 @@
-module blas
+module blas64
 
 pub enum MemoryLayout {
 	row_major = 101
diff --git a/vlas/internal/blas/dgemm.v b/blas/blas64/dgemm.v
similarity index 99%
rename from vlas/internal/blas/dgemm.v
rename to blas/blas64/dgemm.v
index 1af9d1337..f7bcfeb5e 100644
--- a/vlas/internal/blas/dgemm.v
+++ b/blas/blas64/dgemm.v
@@ -1,4 +1,4 @@
-module blas
+module blas64
 
 // import runtime
 import sync
diff --git a/vlas/internal/blas/dgemv.v b/blas/blas64/dgemv.v
similarity index 99%
rename from vlas/internal/blas/dgemv.v
rename to blas/blas64/dgemv.v
index 94705f79c..5c2510387 100644
--- a/vlas/internal/blas/dgemv.v
+++ b/blas/blas64/dgemv.v
@@ -1,4 +1,4 @@
-module blas
+module blas64
 
 import vsl.float.float64
 import math
diff --git a/vlas/internal/blas/dgemv_test.v b/blas/blas64/dgemv_test.v
similarity index 99%
rename from vlas/internal/blas/dgemv_test.v
rename to blas/blas64/dgemv_test.v
index 4273ea257..3964eddd8 100644
--- a/vlas/internal/blas/dgemv_test.v
+++ b/blas/blas64/dgemv_test.v
@@ -1,4 +1,4 @@
-module blas
+module blas64
 
 fn test_dgemv_no_trans_1() {
 	expected := [0.0, 0, 0, 0, 0]
diff --git a/vlas/internal/blas/error.v b/blas/blas64/error.v
similarity index 98%
rename from vlas/internal/blas/error.v
rename to blas/blas64/error.v
index 48e99b597..70e5dddda 100644
--- a/vlas/internal/blas/error.v
+++ b/blas/blas64/error.v
@@ -1,7 +1,8 @@
-module blas
+module blas64
 
 // Panic strings used during parameter checks.
 // This list is duplicated in netlib/blas/netlib. Keep in sync.
+
 pub const zero_incx = 'blas: zero x index increment'
 pub const zero_incy = 'blas: zero y index increment'
 
diff --git a/vlas/internal/blas/level1f64.v b/blas/blas64/level1f64.v
similarity index 99%
rename from vlas/internal/blas/level1f64.v
rename to blas/blas64/level1f64.v
index d42de1895..4d506cee0 100644
--- a/vlas/internal/blas/level1f64.v
+++ b/blas/blas64/level1f64.v
@@ -1,4 +1,4 @@
-module blas
+module blas64
 
 import vsl.float.float64
 import math
diff --git a/vlas/internal/blas/level1f64_ddot.v b/blas/blas64/level1f64_ddot.v
similarity index 98%
rename from vlas/internal/blas/level1f64_ddot.v
rename to blas/blas64/level1f64_ddot.v
index 2f413b178..3fd1310da 100644
--- a/vlas/internal/blas/level1f64_ddot.v
+++ b/blas/blas64/level1f64_ddot.v
@@ -1,4 +1,4 @@
-module blas
+module blas64
 
 import vsl.float.float64
 
diff --git a/vlas/internal/blas/level2f64.v b/blas/blas64/level2f64.v
similarity index 99%
rename from vlas/internal/blas/level2f64.v
rename to blas/blas64/level2f64.v
index 9058f13d6..bee249f55 100644
--- a/vlas/internal/blas/level2f64.v
+++ b/blas/blas64/level2f64.v
@@ -1,4 +1,4 @@
-module blas
+module blas64
 
 import math
 import vsl.float.float64
diff --git a/vlas/internal/blas/level3f64.v b/blas/blas64/level3f64.v
similarity index 99%
rename from vlas/internal/blas/level3f64.v
rename to blas/blas64/level3f64.v
index 0ea444976..e1af75000 100644
--- a/vlas/internal/blas/level3f64.v
+++ b/blas/blas64/level3f64.v
@@ -1,4 +1,4 @@
-module blas
+module blas64
 
 import vsl.float.float64
 import math
diff --git a/vlas/internal/blas/util.v b/blas/blas64/util.v
similarity index 97%
rename from vlas/internal/blas/util.v
rename to blas/blas64/util.v
index 28e0e9a78..41837e456 100644
--- a/vlas/internal/blas/util.v
+++ b/blas/blas64/util.v
@@ -1,4 +1,4 @@
-module blas
+module blas64
 
 // [SD]gemm behavior constants. These are kept here to keep them out of the
 // way during single precision code genration.
diff --git a/vlas/cblas.h b/blas/cblas.h
similarity index 100%
rename from vlas/cblas.h
rename to blas/cblas.h
diff --git a/vlas/cflags_d_vsl_vlas_cblas.v b/blas/cflags_d_vsl_blas_cblas.v
similarity index 73%
rename from vlas/cflags_d_vsl_vlas_cblas.v
rename to blas/cflags_d_vsl_blas_cblas.v
index 0c3432256..6d038411c 100644
--- a/vlas/cflags_d_vsl_vlas_cblas.v
+++ b/blas/cflags_d_vsl_blas_cblas.v
@@ -1,4 +1,4 @@
-module vlas
+module blas
 
 #flag linux -O2 -I/usr/local/include -I/usr/lib
 #flag linux -L/usr/local/lib -L/usr/lib
@@ -7,11 +7,9 @@ module vlas
 // Intel, M1 brew, and MacPorts
 #flag darwin -I/usr/local/opt/openblas/include -I/opt/homebrew/opt/openblas/include -I/opt/local/opt/openblas/include
 #flag darwin -L/usr/local/opt/openblas/lib -L/opt/homebrew/opt/openblas/lib -L/opt/local/opt/openblas/lib
-#flag darwin -L/usr/local/opt/lapack/lib -L/opt/homebrew/opt/lapack/lib -L/opt/local/opt/lapack/lib
 #flag -I@VMODROOT
-#flag -lopenblas -llapacke
+#flag -lopenblas
 
 $if macos {
-	#include <lapacke.h>
 	#include <cblas.h>
 }
diff --git a/vlas/conversions.v b/blas/conversions.v
similarity index 98%
rename from vlas/conversions.v
rename to blas/conversions.v
index c3a097d9f..154edade5 100644
--- a/vlas/conversions.v
+++ b/blas/conversions.v
@@ -1,16 +1,16 @@
-module vlas
+module blas
 
 import strconv
 import math
 import math.complex
 import vsl.errors
-import vsl.vlas.internal.blas
+import vsl.blas.blas64
 
-pub fn c_trans(trans bool) blas.Transpose {
+pub fn c_trans(trans bool) blas64.Transpose {
 	return if trans { .trans } else { .no_trans }
 }
 
-pub fn c_uplo(up bool) blas.Uplo {
+pub fn c_uplo(up bool) blas64.Uplo {
 	return if up { .upper } else { .lower }
 }
 
diff --git a/blas/oblas_d_vsl_blas_cblas.v b/blas/oblas_d_vsl_blas_cblas.v
new file mode 100644
index 000000000..b4483c995
--- /dev/null
+++ b/blas/oblas_d_vsl_blas_cblas.v
@@ -0,0 +1,448 @@
+module blas
+
+import vsl.blas64.blas64
+
+fn C.openblas_set_num_threads(n int)
+
+fn C.cblas_sdsdot(n int, alpha f32, x &f32, incx int, y &f32, incy int) f32
+fn C.cblas_dsdot(n int, x &f32, incx int, y &f32, incy int) f64
+fn C.cblas_sdot(n int, x &f32, incx int, y &f32, incy int) f32
+fn C.cblas_ddot(n int, x &f64, incx int, y &f64, incy int) f64
+fn C.cblas_cdotu(n int, x voidptr, incx int, y voidptr, incy int) f32
+fn C.cblas_cdotc(n int, x voidptr, incx int, y voidptr, incy int) f32
+fn C.cblas_zdotu(n int, x voidptr, incx int, y voidptr, incy int) f64
+fn C.cblas_zdotc(n int, x voidptr, incx int, y voidptr, incy int) f64
+fn C.cblas_cdotu_sub(n int, x voidptr, incx int, y voidptr, incy int, ret voidptr)
+fn C.cblas_cdotc_sub(n int, x voidptr, incx int, y voidptr, incy int, ret voidptr)
+fn C.cblas_zdotu_sub(n int, x voidptr, incx int, y voidptr, incy int, ret voidptr)
+fn C.cblas_zdotc_sub(n int, x voidptr, incx int, y voidptr, incy int, ret voidptr)
+fn C.cblas_sasum(n int, x &f32, incx int) f32
+fn C.cblas_dasum(n int, x &f64, incx int) f64
+fn C.cblas_scasum(n int, x voidptr, incx int) f32
+fn C.cblas_dzasum(n int, x voidptr, incx int) f64
+fn C.cblas_ssum(n int, x &f32, incx int) f32
+fn C.cblas_dsum(n int, x &f64, incx int) f64
+fn C.cblas_scsum(n int, x voidptr, incx int) f32
+fn C.cblas_dzsum(n int, x voidptr, incx int) f64
+fn C.cblas_snrm2(n int, x &f32, incx int) f32
+fn C.cblas_dnrm2(n int, x &f64, incx int) f64
+fn C.cblas_scnrm2(n int, x voidptr, incx int) f32
+fn C.cblas_dznrm2(n int, x voidptr, incx int) f64
+
+fn C.cblas_isamax(n int, x &f32, incx int) int
+fn C.cblas_idamax(n int, x &f64, incx int) int
+fn C.cblas_icamax(n int, x voidptr, incx int) int
+fn C.cblas_izamax(n int, x voidptr, incx int) int
+fn C.cblas_isamin(n int, x &f32, incx int) int
+fn C.cblas_idamin(n int, x &f64, incx int) int
+fn C.cblas_icamin(n int, x voidptr, incx int) int
+fn C.cblas_izamin(n int, x voidptr, incx int) int
+fn C.cblas_ismax(n int, x &f32, incx int) int
+fn C.cblas_idmax(n int, x &f64, incx int) int
+fn C.cblas_icmax(n int, x voidptr, incx int) int
+fn C.cblas_izmax(n int, x voidptr, incx int) int
+fn C.cblas_ismin(n int, x &f32, incx int) int
+fn C.cblas_idmin(n int, x &f64, incx int) int
+fn C.cblas_icmin(n int, x voidptr, incx int) int
+fn C.cblas_izmin(n int, x voidptr, incx int) int
+fn C.cblas_saxpy(n int, alpha f32, x &f32, incx int, y &f32, incy int)
+fn C.cblas_daxpy(n int, alpha f64, x &f64, incx int, y &f64, incy int)
+fn C.cblas_caxpy(n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int)
+fn C.cblas_zaxpy(n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int)
+fn C.cblas_scopy(n int, x &f32, incx int, y &f32, incy int)
+fn C.cblas_dcopy(n int, x &f64, incx int, y &f64, incy int)
+fn C.cblas_ccopy(n int, x voidptr, incx int, y voidptr, incy int)
+fn C.cblas_zcopy(n int, x voidptr, incx int, y voidptr, incy int)
+fn C.cblas_sswap(n int, x &f32, incx int, y &f32, incy int)
+fn C.cblas_dswap(n int, x &f64, incx int, y &f64, incy int)
+fn C.cblas_cswap(n int, x voidptr, incx int, y voidptr, incy int)
+fn C.cblas_zswap(n int, x voidptr, incx int, y voidptr, incy int)
+fn C.cblas_srot(n int, x &f32, incx int, y &f32, incy int, c f32, s f32)
+fn C.cblas_drot(n int, x &f64, incx int, y &f64, incy int, c f64, s f64)
+fn C.cblas_srotg(a &f32, b &f32, c &f32, s &f32)
+fn C.cblas_drotg(a &f64, b &f64, c &f64, s &f64)
+fn C.cblas_srotm(n int, x &f32, incx int, y &f32, incy int, p &f32)
+fn C.cblas_drotm(n int, x &f64, incx int, y &f64, incy int, p &f64)
+fn C.cblas_srotmg(d1 &f32, d2 &f32, b1 &f32, b2 f32, p &f32)
+fn C.cblas_drotmg(d1 &f64, d2 &f64, b1 &f64, b2 f64, p &f64)
+fn C.cblas_sscal(n int, alpha f32, x &f32, incx int)
+fn C.cblas_dscal(n int, alpha f64, x &f64, incx int)
+fn C.cblas_cscal(n int, alpha voidptr, x voidptr, incx int)
+fn C.cblas_zscal(n int, alpha voidptr, x voidptr, incx int)
+fn C.cblas_csscal(n int, alpha f32, x voidptr, incx int)
+fn C.cblas_zdscal(n int, alpha f64, x voidptr, incx int)
+fn C.cblas_sgemv(order blas64.MemoryLayout, trans blas64.Transpose, m int, n int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int)
+fn C.cblas_dgemv(order blas64.MemoryLayout, trans blas64.Transpose, m int, n int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int)
+fn C.cblas_cgemv(order blas64.MemoryLayout, trans blas64.Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_zgemv(order blas64.MemoryLayout, trans blas64.Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_sger(order blas64.MemoryLayout, m int, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32, lda int)
+fn C.cblas_dger(order blas64.MemoryLayout, m int, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64, lda int)
+fn C.cblas_cgeru(order blas64.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
+fn C.cblas_cgerc(order blas64.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
+fn C.cblas_zgeru(order blas64.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
+fn C.cblas_zgerc(order blas64.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
+fn C.cblas_strsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a &f32, lda int, x &f32, incx int)
+fn C.cblas_dtrsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a &f64, lda int, x &f64, incx int)
+fn C.cblas_ctrsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a voidptr, lda int, x voidptr, incx int)
+fn C.cblas_ztrsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a voidptr, lda int, x voidptr, incx int)
+fn C.cblas_strmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a &f32, lda int, x &f32, incx int)
+fn C.cblas_dtrmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a &f64, lda int, x &f64, incx int)
+fn C.cblas_ctrmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a voidptr, lda int, x voidptr, incx int)
+fn C.cblas_ztrmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a voidptr, lda int, x voidptr, incx int)
+fn C.cblas_ssyr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x &f32, incx int, a &f32, lda int)
+fn C.cblas_dsyr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x &f64, incx int, a &f64, lda int)
+fn C.cblas_cher(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x voidptr, incx int, a voidptr, lda int)
+fn C.cblas_zher(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x voidptr, incx int, a voidptr, lda int)
+fn C.cblas_ssyr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32, lda int)
+fn C.cblas_dsyr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64, lda int)
+fn C.cblas_cher2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
+fn C.cblas_zher2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
+fn C.cblas_sgbmv(order blas64.MemoryLayout, transA blas64.Transpose, m int, n int, kl int, ku int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int)
+fn C.cblas_dgbmv(order blas64.MemoryLayout, transA blas64.Transpose, m int, n int, kl int, ku int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int)
+fn C.cblas_cgbmv(order blas64.MemoryLayout, transA blas64.Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_zgbmv(order blas64.MemoryLayout, transA blas64.Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_ssbmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, k int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int)
+fn C.cblas_dsbmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, k int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int)
+fn C.cblas_stbmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a &f32, lda int, x &f32, incx int)
+fn C.cblas_dtbmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a &f64, lda int, x &f64, incx int)
+fn C.cblas_ctbmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int)
+fn C.cblas_ztbmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int)
+fn C.cblas_stbsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a &f32, lda int, x &f32, incx int)
+fn C.cblas_dtbsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a &f64, lda int, x &f64, incx int)
+fn C.cblas_ctbsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int)
+fn C.cblas_ztbsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int)
+fn C.cblas_stpmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap &f32, x &f32, incx int)
+fn C.cblas_dtpmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap &f64, x &f64, incx int)
+fn C.cblas_ctpmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap voidptr, x voidptr, incx int)
+fn C.cblas_ztpmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap voidptr, x voidptr, incx int)
+fn C.cblas_stpsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap &f32, x &f32, incx int)
+fn C.cblas_dtpsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap &f64, x &f64, incx int)
+fn C.cblas_ctpsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap voidptr, x voidptr, incx int)
+fn C.cblas_ztpsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap voidptr, x voidptr, incx int)
+fn C.cblas_ssymv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int)
+fn C.cblas_dsymv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int)
+fn C.cblas_chemv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_zhemv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_sspmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, ap &f32, x &f32, incx int, beta f32, y &f32, incy int)
+fn C.cblas_dspmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, ap &f64, x &f64, incx int, beta f64, y &f64, incy int)
+fn C.cblas_sspr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x &f32, incx int, ap &f32)
+fn C.cblas_dspr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x &f64, incx int, ap &f64)
+fn C.cblas_chpr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x voidptr, incx int, a voidptr)
+fn C.cblas_zhpr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x voidptr, incx int, a voidptr)
+fn C.cblas_sspr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32)
+fn C.cblas_dspr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64)
+fn C.cblas_chpr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, AP voidptr)
+fn C.cblas_zhpr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, AP voidptr)
+fn C.cblas_chbmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_zhbmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_chpmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, AP voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_zhpmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, AP voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_sgemm(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int)
+fn C.cblas_dgemm(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int)
+fn C.cblas_cgemm(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_cgemm3m(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_zgemm(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_zgemm3m(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_ssymm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int)
+fn C.cblas_dsymm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int)
+fn C.cblas_csymm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_zsymm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_ssyrk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f32, a &f32, lda int, beta f32, c &f32, ldc int)
+fn C.cblas_dsyrk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f64, a &f64, lda int, beta f64, c &f64, ldc int)
+fn C.cblas_csyrk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_zsyrk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_ssyr2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int)
+fn C.cblas_dsyr2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int)
+fn C.cblas_csyr2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_zsyr2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_strmm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int)
+fn C.cblas_dtrmm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int)
+fn C.cblas_ctrmm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int)
+fn C.cblas_ztrmm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int)
+fn C.cblas_strsm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int)
+fn C.cblas_dtrsm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int)
+fn C.cblas_ctrsm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int)
+fn C.cblas_ztrsm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int)
+fn C.cblas_chemm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_zhemm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_cherk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f32, a voidptr, lda int, beta f32, c voidptr, ldc int)
+fn C.cblas_zherk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f64, a voidptr, lda int, beta f64, c voidptr, ldc int)
+fn C.cblas_cher2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta f32, c voidptr, ldc int)
+fn C.cblas_zher2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta f64, c voidptr, ldc int)
+fn C.cblas_xerbla(p int, rout &byte, form &byte, other voidptr)
+
+fn C.cblas_saxpby(n int, alpha f32, x &f32, incx int, beta f32, y &f32, incy int)
+fn C.cblas_daxpby(n int, alpha f64, x &f64, incx int, beta f64, y &f64, incy int)
+fn C.cblas_caxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_zaxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_somatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha f32, a &f32, clda int, b &f32, cldb int)
+fn C.cblas_domatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha f64, a &f64, clda int, b &f64, cldb int)
+fn C.cblas_comatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha &f32, a &f32, clda int, b &f32, cldb int)
+fn C.cblas_zomatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha &f64, a &f64, clda int, b &f64, cldb int)
+fn C.cblas_simatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha f32, a &f32, clda int, cldb int)
+fn C.cblas_dimatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha f64, a &f64, clda int, cldb int)
+fn C.cblas_cimatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha &f32, a &f32, clda int, cldb int)
+fn C.cblas_zimatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha &f64, a &f64, clda int, cldb int)
+fn C.cblas_sgeadd(corder blas64.MemoryLayout, crows int, ccols int, calpha f32, a &f32, clda int, cbeta f32, c &f32, cldc int)
+fn C.cblas_dgeadd(corder blas64.MemoryLayout, crows int, ccols int, calpha f64, a &f64, clda int, cbeta f64, c &f64, cldc int)
+fn C.cblas_cgeadd(corder blas64.MemoryLayout, crows int, ccols int, calpha &f32, a &f32, clda int, cbeta &f32, c &f32, cldc int)
+fn C.cblas_zgeadd(corder blas64.MemoryLayout, crows int, ccols int, calpha &f64, a &f64, clda int, cbeta &f64, c &f64, cldc int)
+
+// set_num_threads sets the number of threads in OpenBLAS
+pub fn set_num_threads(n int) {
+	C.openblas_set_num_threads(n)
+}
+
+@[inline]
+pub fn sdsdot(n int, alpha f32, x []f32, incx int, y []f32, incy int) f32 {
+	return C.cblas_sdsdot(n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn dsdot(n int, x []f32, incx int, y []f32, incy int) f64 {
+	return C.cblas_dsdot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn sdot(n int, x []f32, incx int, y []f32, incy int) f32 {
+	return C.cblas_sdot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn ddot(n int, x []f64, incx int, y []f64, incy int) f64 {
+	return C.cblas_ddot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn sasum(n int, x []f32, incx int) f32 {
+	return C.cblas_sasum(n, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn dasum(n int, x []f64, incx int) f64 {
+	return C.cblas_dasum(n, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn ssum(n int, x []f32, incx int) f32 {
+	return C.cblas_ssum(n, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn dsum(n int, x []f64, incx int) f64 {
+	return C.cblas_dsum(n, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn snrm2(n int, x []f32, incx int) f32 {
+	return C.cblas_snrm2(n, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn dnrm2(n int, x []f64, incx int) f64 {
+	return C.cblas_dnrm2(n, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn isamax(n int, x []f32, incx int) int {
+	return C.cblas_isamax(n, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn idamax(n int, x []f64, incx int) int {
+	return C.cblas_idamax(n, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn isamin(n int, x []f32, incx int) int {
+	return C.cblas_isamin(n, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn idamin(n int, x &f64, incx int) int {
+	return C.cblas_idamin(n, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn ismax(n int, x []f32, incx int) int {
+	return C.cblas_ismax(n, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn idmax(n int, x []f64, incx int) int {
+	return C.cblas_idmax(n, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn ismin(n int, x []f32, incx int) int {
+	return C.cblas_ismin(n, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn idmin(n int, x []f64, incx int) int {
+	return C.cblas_idmin(n, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn saxpy(n int, alpha f32, x []f32, incx int, mut y []f32, incy int) {
+	C.cblas_saxpy(n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn daxpy(n int, alpha f64, x []f64, incx int, mut y []f64, incy int) {
+	C.cblas_daxpy(n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn scopy(n int, mut x []f32, incx int, mut y []f32, incy int) {
+	C.cblas_scopy(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn dcopy(n int, mut x []f64, incx int, mut y []f64, incy int) {
+	C.cblas_dcopy(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn sswap(n int, mut x []f32, incx int, mut y []f32, incy int) {
+	C.cblas_sswap(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn dswap(n int, mut x []f64, incx int, mut y []f64, incy int) {
+	C.cblas_dswap(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn srot(n int, mut x []f32, incx int, mut y []f32, incy int, c f32, s f32) {
+	C.cblas_srot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, c, s)
+}
+
+@[inline]
+pub fn drot(n int, mut x []f64, incx int, mut y []f64, incy int, c f64, s f64) {
+	C.cblas_drot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, c, s)
+}
+
+@[inline]
+pub fn srotg(a f32, b f32, c f32, s f32) {
+	C.cblas_srotg(&a, &b, &c, &s)
+}
+
+@[inline]
+pub fn drotg(a f64, b f64, c f64, s f64) {
+	C.cblas_drotg(&a, &b, &c, &s)
+}
+
+@[inline]
+pub fn srotm(n int, x []f32, incx int, y []f32, incy int, p []f32) {
+	C.cblas_srotm(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, unsafe { &p[0] })
+}
+
+@[inline]
+pub fn drotm(n int, x []f64, incx int, y []f64, incy int, p []f64) {
+	C.cblas_drotm(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, unsafe { &p[0] })
+}
+
+@[inline]
+pub fn srotmg(d1 f32, d2 f32, b1 f32, b2 f32, p []f32) {
+	C.cblas_srotmg(&d1, &d2, &b1, b2, unsafe { &p[0] })
+}
+
+@[inline]
+pub fn drotmg(d1 f64, d2 f64, b1 f64, b2 f32, p []f64) {
+	C.cblas_drotmg(&d1, &d2, &b1, b2, unsafe { &p[0] })
+}
+
+@[inline]
+pub fn sscal(n int, alpha f32, mut x []f32, incx int) {
+	C.cblas_sscal(n, alpha, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn dscal(n int, alpha f64, mut x []f64, incx int) {
+	C.cblas_dscal(n, alpha, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn sgemv(trans bool, m int, n int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) {
+	C.cblas_sgemv(.row_major, c_trans(trans), m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
+		incx, beta, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn dgemv(trans bool, m int, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
+	C.cblas_dgemv(.row_major, c_trans(trans), m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
+		incx, beta, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn sger(m int, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32, lda int) {
+	C.cblas_sger(.row_major, m, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy,
+		unsafe { &a[0] }, lda)
+}
+
+@[inline]
+pub fn dger(m int, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) {
+	C.cblas_dger(.row_major, m, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy,
+		unsafe { &a[0] }, lda)
+}
+
+@[inline]
+pub fn strsv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f32, lda int, mut x []f32, incx int) {
+	C.cblas_strsv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] },
+		lda, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn dtrsv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
+	C.cblas_dtrsv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] },
+		lda, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn strmv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f32, lda int, mut x []f32, incx int) {
+	C.cblas_strmv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] },
+		lda, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn dtrmv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
+	C.cblas_dtrmv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] },
+		lda, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn ssyr(uplo bool, n int, alpha f32, x []f32, incx int, mut a []f32, lda int) {
+	C.cblas_ssyr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] },
+		lda)
+}
+
+@[inline]
+pub fn dsyr(uplo bool, n int, alpha f64, x []f64, incx int, mut a []f64, lda int) {
+	C.cblas_dsyr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] },
+		lda)
+}
+
+@[inline]
+pub fn ssyr2(uplo bool, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32, lda int) {
+	C.cblas_ssyr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] },
+		incy, unsafe { &a[0] }, lda)
+}
+
+@[inline]
+pub fn dsyr2(uplo bool, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) {
+	C.cblas_dsyr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] },
+		incy, unsafe { &a[0] }, lda)
+}
+
+@[inline]
+pub fn sgemm(trans_a bool, trans_b bool, m int, n int, k int, alpha f32, a []f32, lda int, b []f32, ldb int, beta f32, mut cc []f32, ldc int) {
+	C.cblas_sgemm(.row_major, c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, unsafe { &a[0] },
+		lda, unsafe { &b[0] }, ldb, beta, unsafe { &cc[0] }, ldc)
+}
+
+@[inline]
+pub fn dgemm(trans_a bool, trans_b bool, m int, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut cc []f64, ldc int) {
+	C.cblas_dgemm(.row_major, c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, unsafe { &a[0] },
+		lda, unsafe { &b[0] }, ldb, beta, unsafe { &cc[0] }, ldc)
+}
diff --git a/vlas/oblas_notd_vsl_vlas_cblas.v b/blas/oblas_notd_vsl_blas_cblas.v
similarity index 53%
rename from vlas/oblas_notd_vsl_vlas_cblas.v
rename to blas/oblas_notd_vsl_blas_cblas.v
index ae4ff46d9..763995224 100644
--- a/vlas/oblas_notd_vsl_vlas_cblas.v
+++ b/blas/oblas_notd_vsl_blas_cblas.v
@@ -1,82 +1,83 @@
-module vlas
+module blas
 
-import vsl.vlas.internal.blas
+import vsl.blas.blas64
 
-// set_num_threads sets the number of threads in VLAS
+// set_num_threads sets the number of threads in BLAS
+@[inline]
 pub fn set_num_threads(n int) {}
 
 @[inline]
 pub fn ddot(n int, x []f64, incx int, y []f64, incy int) f64 {
-	return blas.ddot(n, x, incx, y, incy)
+	return blas64.ddot(n, x, incx, y, incy)
 }
 
 @[inline]
 pub fn dasum(n int, x []f64, incx int) f64 {
-	return blas.dasum(n, x, incx)
+	return blas64.dasum(n, x, incx)
 }
 
 @[inline]
 pub fn dnrm2(n int, x []f64, incx int) f64 {
-	return blas.dnrm2(n, x, incx)
+	return blas64.dnrm2(n, x, incx)
 }
 
 @[inline]
 pub fn daxpy(n int, alpha f64, x []f64, incx int, mut y []f64, incy int) {
-	blas.daxpy(n, alpha, x, incx, mut y, incy)
+	blas64.daxpy(n, alpha, x, incx, mut y, incy)
 }
 
 @[inline]
 pub fn dcopy(n int, x []f64, incx int, mut y []f64, incy int) {
-	blas.dcopy(n, x, incx, mut y, incy)
+	blas64.dcopy(n, x, incx, mut y, incy)
 }
 
 @[inline]
 pub fn dswap(n int, mut x []f64, incx int, mut y []f64, incy int) {
-	blas.dswap(n, mut x, incx, mut y, incy)
+	blas64.dswap(n, mut x, incx, mut y, incy)
 }
 
 @[inline]
 pub fn drot(n int, mut x []f64, incx int, mut y []f64, incy int, c f64, s f64) {
-	blas.drot(n, mut x, incx, mut y, incy, c, s)
+	blas64.drot(n, mut x, incx, mut y, incy, c, s)
 }
 
 @[inline]
 pub fn dscal(n int, alpha f64, mut x []f64, incx int) {
-	blas.dscal(n, alpha, mut x, incx)
+	blas64.dscal(n, alpha, mut x, incx)
 }
 
 @[inline]
 pub fn dgemv(trans bool, m int, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
-	blas.dgemv(c_trans(trans), m, n, alpha, a, lda, x, incx, beta, mut y, incy)
+	blas64.dgemv(c_trans(trans), m, n, alpha, a, lda, x, incx, beta, mut y, incy)
 }
 
 @[inline]
 pub fn dger(m int, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) {
-	blas.dger(m, n, alpha, x, incx, y, incy, mut a, lda)
+	blas64.dger(m, n, alpha, x, incx, y, incy, mut a, lda)
 }
 
 @[inline]
-pub fn dtrsv(uplo bool, trans_a bool, diag blas.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
-	blas.dtrsv(c_uplo(uplo), c_trans(trans_a), diag, n, a, lda, mut x, incx)
+pub fn dtrsv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
+	blas64.dtrsv(c_uplo(uplo), c_trans(trans_a), diag, n, a, lda, mut x, incx)
 }
 
 @[inline]
-pub fn dtrmv(uplo bool, trans_a bool, diag blas.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
-	blas.dtrmv(c_uplo(uplo), c_trans(trans_a), diag, n, a, lda, mut x, incx)
+pub fn dtrmv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
+	blas64.dtrmv(c_uplo(uplo), c_trans(trans_a), diag, n, a, lda, mut x, incx)
 }
 
 @[inline]
 pub fn dsyr(uplo bool, n int, alpha f64, x []f64, incx int, mut a []f64, lda int) {
-	blas.dsyr(c_uplo(uplo), n, alpha, x, incx, mut a, lda)
+	blas64.dsyr(c_uplo(uplo), n, alpha, x, incx, mut a, lda)
 }
 
 @[inline]
 pub fn dsyr2(uplo bool, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) {
-	blas.dsyr2(c_uplo(uplo), n, alpha, x, incx, y, incy, mut a, lda)
+	blas64.dsyr2(c_uplo(uplo), n, alpha, x, incx, y, incy, mut a, lda)
 }
 
 @[inline]
 pub fn dgemm(trans_a bool, trans_b bool, m int, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut cc []f64, ldc int) {
-	blas.dgemm(c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, a, lda, b, ldb, beta, mut
+	blas64.dgemm(c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, a, lda, b, ldb, beta, mut
 		cc, ldc)
 }
diff --git a/vlas/openblas_config.h b/blas/openblas_config.h
similarity index 100%
rename from vlas/openblas_config.h
rename to blas/openblas_config.h
diff --git a/blas/v.mod b/blas/v.mod
new file mode 100644
index 000000000..0e946714d
--- /dev/null
+++ b/blas/v.mod
@@ -0,0 +1,8 @@
+Module {
+        name: 'blas'
+        description: 'The V Basic Linear Algebra System'
+        version: '0.1.0'
+        license: 'MIT'
+	    repo_url: 'https://github.com/vlang/vsl'
+        dependencies: []
+}
diff --git a/la/blas.v b/la/blas.v
index 53514a776..630ab49f6 100644
--- a/la/blas.v
+++ b/la/blas.v
@@ -1,6 +1,6 @@
 module la
 
-import vsl.vlas
+import vsl.blas
 import math
 
 // TODO: @ulises-jeremias to remove this once https://github.com/vlang/v/issues/14047 is finished
@@ -47,7 +47,7 @@ pub fn vector_dot[T](u []T, v []T) T {
 			}
 			return res
 		}
-		return vlas.ddot(u.len, arr_to_f64arr[T](u), 1, arr_to_f64arr[T](v), 1)
+		return blas.ddot(u.len, arr_to_f64arr[T](u), 1, arr_to_f64arr[T](v), 1)
 	} $else {
 		mut res := T{}
 		for i in 0 .. u.len {
@@ -66,7 +66,7 @@ pub fn vector_add[T](alpha T, u []T, beta T, v []T) []T {
 		cutoff := 150
 		if beta == 1 && n > cutoff {
 			res = v.clone()
-			vlas.daxpy(n, alpha, arr_to_f64arr(u), 1, mut res, 1)
+			blas.daxpy(n, alpha, arr_to_f64arr(u), 1, mut res, 1)
 			return res
 		}
 		m := n % 4
@@ -136,7 +136,7 @@ pub fn matrix_vector_mul[T](alpha T, a &Matrix[T], u []T) []T {
 			}
 			return v
 		}
-		vlas.dgemv(false, a.m, a.n, alpha, arr_to_f64arr[T](a.data), a.m, arr_to_f64arr[T](u),
+		blas.dgemv(false, a.m, a.n, alpha, arr_to_f64arr[T](a.data), a.m, arr_to_f64arr[T](u),
 			1, 0.0, mut v, v.len)
 		return v
 	} $else {
@@ -167,7 +167,7 @@ pub fn matrix_tr_vector_mul[T](alpha T, a &Matrix[T], u []T) []T {
 			}
 			return v
 		}
-		vlas.dgemv(true, a.m, a.n, alpha, arr_to_f64arr[T](a.data), a.n, arr_to_f64arr[T](u),
+		blas.dgemv(true, a.m, a.n, alpha, arr_to_f64arr[T](a.data), a.n, arr_to_f64arr[T](u),
 			1, 0.0, mut v, v.len)
 		return v
 	} $else {
@@ -199,7 +199,7 @@ pub fn vector_vector_tr_mul[T](alpha T, u []T, v []T) &Matrix[T] {
 			return m
 		}
 		mut a := []f64{len: u.len * v.len}
-		vlas.dger(m.m, m.n, alpha, arr_to_f64arr[T](u), 1, arr_to_f64arr[T](v), 1, mut
+		blas.dger(m.m, m.n, alpha, arr_to_f64arr[T](u), 1, arr_to_f64arr[T](v), 1, mut
 			a, int(math.max(m.m, m.n)))
 		return Matrix.raw(u.len, v.len, a)
 	} $else {
@@ -220,7 +220,7 @@ pub fn vector_vector_tr_mul[T](alpha T, u []T, v []T) &Matrix[T] {
 //
 pub fn matrix_vector_mul_add(alpha f64, a &Matrix[f64], u []f64) []f64 {
 	mut v := []f64{len: a.m}
-	vlas.dgemv(false, a.m, a.n, alpha, a.data, a.m, u, 1, 1.0, mut v, v.len)
+	blas.dgemv(false, a.m, a.n, alpha, a.data, a.m, u, 1, 1.0, mut v, v.len)
 	return v
 }
 
@@ -240,7 +240,7 @@ pub fn matrix_matrix_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix
 		}
 		return
 	}
-	vlas.dgemm(false, false, a.m, b.n, a.n, alpha, a.data, a.m, b.data, b.m, 0.0, mut
+	blas.dgemm(false, false, a.m, b.n, a.n, alpha, a.data, a.m, b.data, b.m, 0.0, mut
 		c.data, c.m)
 }
 
@@ -260,7 +260,7 @@ pub fn matrix_tr_matrix_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Mat
 		}
 		return
 	}
-	vlas.dgemm(true, false, a.n, b.n, a.m, alpha, a.data, a.n, b.data, b.m, 0.0, mut c.data,
+	blas.dgemm(true, false, a.n, b.n, a.m, alpha, a.data, a.n, b.data, b.m, 0.0, mut c.data,
 		c.m)
 }
 
@@ -269,7 +269,7 @@ pub fn matrix_tr_matrix_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Mat
 //  c := alpha⋅a⋅bᵀ    ⇒    cij := alpha * aik * bjk
 //
 pub fn matrix_matrix_tr_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) {
-	vlas.dgemm(false, true, a.m, b.m, a.n, alpha, a.data, a.n, b.data, b.m, 0.0, mut c.data,
+	blas.dgemm(false, true, a.m, b.m, a.n, alpha, a.data, a.n, b.data, b.m, 0.0, mut c.data,
 		c.m)
 }
 
@@ -278,7 +278,7 @@ pub fn matrix_matrix_tr_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Mat
 //  c := alpha⋅aᵀ⋅bᵀ    ⇒    cij := alpha * aki * bjk
 //
 pub fn matrix_tr_matrix_tr_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) {
-	vlas.dgemm(true, true, a.n, b.m, a.m, alpha, a.data, a.n, b.data, b.m, 0.0, mut c.data,
+	blas.dgemm(true, true, a.n, b.m, a.m, alpha, a.data, a.n, b.data, b.m, 0.0, mut c.data,
 		c.m)
 }
 
@@ -287,7 +287,7 @@ pub fn matrix_tr_matrix_tr_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &
 //  c += alpha⋅a⋅b    ⇒    cij += alpha * aik * bkj
 //
 pub fn matrix_matrix_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) {
-	vlas.dgemm(false, false, a.m, b.n, a.n, alpha, a.data, a.n, b.data, b.m, 1.0, mut
+	blas.dgemm(false, false, a.m, b.n, a.n, alpha, a.data, a.n, b.data, b.m, 1.0, mut
 		c.data, c.m)
 }
 
@@ -296,7 +296,7 @@ pub fn matrix_matrix_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Mat
 //  c += alpha⋅aᵀ⋅b    ⇒    cij += alpha * aki * bkj
 //
 pub fn matrix_tr_matrix_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) {
-	vlas.dgemm(true, false, a.n, b.n, a.m, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data,
+	blas.dgemm(true, false, a.n, b.n, a.m, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data,
 		c.m)
 }
 
@@ -305,7 +305,7 @@ pub fn matrix_tr_matrix_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &
 //  c += alpha⋅a⋅bᵀ    ⇒    cij += alpha * aik * bjk
 //
 pub fn matrix_matrix_tr_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) {
-	vlas.dgemm(false, true, a.m, b.m, a.n, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data,
+	blas.dgemm(false, true, a.m, b.m, a.n, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data,
 		c.m)
 }
 
@@ -314,7 +314,7 @@ pub fn matrix_matrix_tr_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &
 //  c += alpha⋅aᵀ⋅bᵀ    ⇒    cij += alpha * aki * bjk
 //
 pub fn matrix_tr_matrix_tr_mul_add(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) {
-	vlas.dgemm(true, true, a.n, b.m, a.m, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data,
+	blas.dgemm(true, true, a.n, b.m, a.m, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data,
 		c.m)
 }
 
@@ -325,7 +325,7 @@ pub fn matrix_add(mut res Matrix[f64], alpha f64, a &Matrix[f64], beta f64, b &M
 	cutoff := 150
 	if beta == 1 && n > cutoff {
 		res.data = b.data.clone()
-		vlas.daxpy(n, alpha, a.data, 1, mut res.data, 1)
+		blas.daxpy(n, alpha, a.data, 1, mut res.data, 1)
 		return
 	}
 	m := n % 4
diff --git a/la/densesol.v b/la/densesol.v
index 8f8a14b21..10395a902 100644
--- a/la/densesol.v
+++ b/la/densesol.v
@@ -1,8 +1,8 @@
 module la
 
-import vsl.vlas
+import vsl.lapack
 
-// den_solve solves dense linear system using LAPACK (OpenBLaS)
+// den_solve solves dense linear system using LAPACK
 //
 //   Given:  a ⋅ x = b    find x   such that   x = a⁻¹ ⋅ b
 //
@@ -16,5 +16,5 @@ pub fn den_solve(mut x []f64, a &Matrix[f64], b []f64, preserve_a bool) {
 		x[i] = b[i]
 	}
 	ipiv := []int{len: a_.m}
-	vlas.dgesv(a_.m, 1, mut a_.data, a_.m, ipiv, mut x, 1)
+	lapack.dgesv(a_.m, 1, mut a_.data, a_.m, ipiv, mut x, 1)
 }
diff --git a/la/matrix_ops.v b/la/matrix_ops.v
index 9e8cfa1f8..74dc6ea28 100644
--- a/la/matrix_ops.v
+++ b/la/matrix_ops.v
@@ -1,7 +1,7 @@
 module la
 
 import vsl.errors
-import vsl.vlas
+import vsl.lapack
 import math
 
 // det computes the determinant of matrix using the LU factorization
@@ -13,7 +13,7 @@ pub fn matrix_det(o &Matrix[f64]) f64 {
 	}
 	mut ai := o.data.clone()
 	ipiv := []int{len: int(math.min(o.m, o.n))}
-	vlas.dgetrf(o.m, o.n, mut ai, o.m, ipiv) // NOTE: ipiv are 1-based indices
+	lapack.dgetrf(o.m, o.n, mut ai, o.m, ipiv) // NOTE: ipiv are 1-based indices
 	mut det := 1.0
 	for i in 0 .. o.m {
 		if ipiv[i] - 1 == i { // NOTE: ipiv are 1-based indices
@@ -90,7 +90,7 @@ pub fn matrix_svd(mut s []f64, mut u Matrix[f64], mut vt Matrix[f64], mut a Matr
 	if copy_a {
 		acpy = a.clone()
 	}
-	vlas.dgesvd(&char('A'.str), &char('A'.str), a.m, a.n, acpy.data, 1, s, u.data, a.m,
+	lapack.dgesvd(&char('A'.str), &char('A'.str), a.m, a.n, acpy.data, 1, s, u.data, a.m,
 		vt.data, a.n, superb)
 }
 
@@ -108,7 +108,7 @@ pub fn matrix_inv(mut ai Matrix[f64], mut a Matrix[f64], calc_det bool) f64 {
 	if a.m == a.n {
 		ai.data = a.data.clone()
 		ipiv := []int{len: int(math.min(a.m, a.n))}
-		vlas.dgetrf(a.m, a.n, mut ai.data, a.m, ipiv) // NOTE: ipiv are 1-based indices
+		lapack.dgetrf(a.m, a.n, mut ai.data, a.m, ipiv) // NOTE: ipiv are 1-based indices
 		if calc_det {
 			det = 1.0
 			for i := 0; i < a.m; i++ {
@@ -119,7 +119,7 @@ pub fn matrix_inv(mut ai Matrix[f64], mut a Matrix[f64], calc_det bool) f64 {
 				}
 			}
 		}
-		vlas.dgetri(a.n, mut ai.data, a.m, ipiv)
+		lapack.dgetri(a.n, mut ai.data, a.m, ipiv)
 		return det
 	}
 	// singular value decomposition
diff --git a/lapack/README.md b/lapack/README.md
new file mode 100644
index 000000000..fccedfdfd
--- /dev/null
+++ b/lapack/README.md
@@ -0,0 +1,58 @@
+# The V Linear Algebra Package
+
+This package implements Linear Algebra routines in V.
+
+| Backend  | Description                                                                                                                                                        | Status | Compilation Flags   |
+| -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------ | ------------------- |
+| BLAS     | Pure V implementation                                                                                                                                              | Stable | `NONE`              |
+| LAPACKE  | LAPACKE is a C interface to LAPACK. It is a standard part of the LAPACK distribution. Check the section [LAPACKE Backend](#lapacke-backend) for more information. | Stable | `-d vsl_lapack_lapacke`    |
+
+Therefore, its routines are a little more _lower level_ than the ones in the package `vsl.la`.
+
+## LAPACKE Backend
+
+We provide a backend for the LAPACKE library. This backend is probably the fastest one for all platforms
+but it requires the installation of the LAPACKE library.
+
+Use the compilation flag `-d vsl_lapack_lapacke` to use the LAPACKE backend
+instead of the pure V implementation
+and make sure that the LAPACKE library is installed in your system.
+
+Check the section below for more information about installing the LAPACKE library.
+
+<details>
+<summary>Install dependencies</summary>
+
+### Homebrew (macOS)
+
+```sh
+brew install lapack
+```
+
+### Debian/Ubuntu GNU Linux
+
+```sh
+sudo apt-get install -y --no-install-recommends \
+    gcc \
+    gfortran \
+    liblapacke-dev
+```
+
+### Arch Linux/Manjaro GNU Linux
+
+The best way of installing LAPACKE is using
+[lapack-openblas](https://aur.archlinux.org/packages/lapack-openblas/).
+
+```sh
+yay -S lapack-openblas
+```
+
+or
+
+```sh
+git clone https://aur.archlinux.org/lapack-openblas.git /tmp/lapack-openblas
+cd /tmp/lapack-openblas
+makepkg -si
+```
+
+</details>
diff --git a/vlas/cflags_notd_vsl_vlas_cblas.v b/lapack/cflags_d_vsl_lapack_lapacke copy.v
similarity index 72%
rename from vlas/cflags_notd_vsl_vlas_cblas.v
rename to lapack/cflags_d_vsl_lapack_lapacke copy.v
index a25a3ec30..081f788db 100644
--- a/vlas/cflags_notd_vsl_vlas_cblas.v
+++ b/lapack/cflags_d_vsl_lapack_lapacke copy.v	
@@ -1,11 +1,10 @@
-module vlas
+module lapack
 
 #flag linux -O2 -I/usr/local/include -I/usr/lib
 #flag linux -L/usr/local/lib -L/usr/lib
 #flag windows -O2
 #flag windows -lgfortran
 // Intel, M1 brew, and MacPorts
-#flag darwin -I/usr/local/opt/lapack/include -I/opt/homebrew/opt/lapack/include -I/opt/local/opt/lapack/include
 #flag darwin -L/usr/local/opt/lapack/lib -L/opt/homebrew/opt/lapack/lib -L/opt/local/opt/lapack/lib
 #flag -I@VMODROOT
 #flag -llapacke
diff --git a/lapack/cflags_notd_vsl_lapack_lapacke.v b/lapack/cflags_notd_vsl_lapack_lapacke.v
new file mode 100644
index 000000000..081f788db
--- /dev/null
+++ b/lapack/cflags_notd_vsl_lapack_lapacke.v
@@ -0,0 +1,14 @@
+module lapack
+
+#flag linux -O2 -I/usr/local/include -I/usr/lib
+#flag linux -L/usr/local/lib -L/usr/lib
+#flag windows -O2
+#flag windows -lgfortran
+// Intel, M1 brew, and MacPorts
+#flag darwin -L/usr/local/opt/lapack/lib -L/opt/homebrew/opt/lapack/lib -L/opt/local/opt/lapack/lib
+#flag -I@VMODROOT
+#flag -llapacke
+
+$if macos {
+	#include <lapacke.h>
+}
diff --git a/lapack/lapack64/dgesv.v b/lapack/lapack64/dgesv.v
new file mode 100644
index 000000000..dc8b61bdf
--- /dev/null
+++ b/lapack/lapack64/dgesv.v
@@ -0,0 +1,56 @@
+module lapack
+
+import math
+import vsl.blas.blas64
+
+// dgesv computes the solution to a real system of linear equations
+//
+//	A * X = B
+//
+// where A is an n×n matrix and X and B are n×nrhs matrices.
+//
+// The LU decomposition with partial pivoting and row interchanges is used to
+// factor A as
+//
+//	A = P * L * U
+//
+// where P is a permutation matrix, L is unit lower triangular, and U is upper
+// triangular. On return, the factors L and U are stored in a; the unit diagonal
+// elements of L are not stored. The row pivot indices that define the
+// permutation matrix P are stored in ipiv.
+//
+// The factored form of A is then used to solve the system of equations A * X =
+// B. On entry, b contains the right hand side matrix B. On return, if ok is
+// true, b contains the solution matrix X.
+pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb int) {
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if nrhs < 0 {
+		panic(nrhs_lt0)
+	}
+	if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	}
+	if ldb < math.max(1, n) {
+		panic(bad_ld_b)
+	}
+
+	// Quick return if possible.
+	if n == 0 || nrhs == 0 {
+		return
+	}
+
+	if a.len < (n - 1) * lda + n {
+		panic(short_ab)
+	}
+	if ipiv.len < n {
+		panic(bad_len_ipiv)
+	}
+	if b.len < (n - 1) * ldb + nrhs {
+		panic(short_b)
+	}
+
+	dgetrf(n, n, mut a, lda, ipiv)
+	dgetrs(.no_trans, n, nrhs, mut a, lda, ipiv, mut b, ldb)
+}
diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v
new file mode 100644
index 000000000..54e383eac
--- /dev/null
+++ b/lapack/lapack64/dgetrf.v
@@ -0,0 +1,51 @@
+module lapack
+
+import math
+import vsl.blas.blas64
+
+// dgetrf computes the LU decomposition of an m×n matrix A using partial
+// pivoting with row interchanges.
+//
+// The LU decomposition is a factorization of A into
+//
+//	A = P * L * U
+//
+// where P is a permutation matrix, L is a lower triangular with unit diagonal
+// elements (lower trapezoidal if m > n), and U is upper triangular (upper
+// trapezoidal if m < n).
+//
+// On entry, a contains the matrix A. On return, L and U are stored in place
+// into a, and P is represented by ipiv.
+//
+// ipiv contains a sequence of row interchanges. It indicates that row i of the
+// matrix was interchanged with ipiv[i]. ipiv must have length min(m,n), and
+// Dgetrf will panic otherwise. ipiv is zero-indexed.
+//
+// Dgetrf returns whether the matrix A is nonsingular. The LU decomposition will
+// be computed regardless of the singularity of A, but the result should not be
+// used to solve a system of equation.
+pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) {
+	mn := math.min(m, n)
+
+	if m < 0 {
+		panic(m_lt0)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if lda < math.max(1, n) {
+		panic(bad_lda)
+	}
+
+	// quick return if possible
+	if mn == 0 {
+		return
+	}
+
+	if a.len < (m - 1) * lda + n {
+		panic(short_a)
+	}
+	if ipiv.len < mn {
+		panic(bad_len_ipiv)
+	}
+}
diff --git a/lapack/lapack64/errors.v b/lapack/lapack64/errors.v
new file mode 100644
index 000000000..d17defc08
--- /dev/null
+++ b/lapack/lapack64/errors.v
@@ -0,0 +1,178 @@
+module lapack
+
+// This list is duplicated in netlib/lapack/netlib. Keep in sync.
+
+// Panic strings for bad enumeration values.
+pub const bad_apply_ortho = 'lapack: bad ApplyOrtho'
+pub const bad_balance_job = 'lapack: bad BalanceJob'
+pub const bad_diag = 'lapack: bad Diag'
+pub const bad_direct = 'lapack: bad Direct'
+pub const bad_ev_comp = 'lapack: bad EVComp'
+pub const bad_ev_how_many = 'lapack: bad EVHowMany'
+pub const bad_ev_job = 'lapack: bad EVJob'
+pub const bad_ev_side = 'lapack: bad EVSide'
+pub const bad_gsvd_job = 'lapack: bad GSVDJob'
+pub const bad_gen_ortho = 'lapack: bad GenOrtho'
+pub const bad_left_ev_job = 'lapack: bad LeftEVJob'
+pub const bad_matrix_type = 'lapack: bad MatrixType'
+pub const bad_maximize_norm_x_job = 'lapack: bad MaximizeNormXJob'
+pub const bad_norm = 'lapack: bad Norm'
+pub const bad_ortho_comp = 'lapack: bad OrthoComp'
+pub const bad_pivot = 'lapack: bad Pivot'
+pub const bad_right_ev_job = 'lapack: bad RightEVJob'
+pub const bad_svd_job = 'lapack: bad SVDJob'
+pub const bad_schur_comp = 'lapack: bad SchurComp'
+pub const bad_schur_job = 'lapack: bad SchurJob'
+pub const bad_side = 'lapack: bad Side'
+pub const bad_sort = 'lapack: bad Sort'
+pub const bad_store_v = 'lapack: bad StoreV'
+pub const bad_trans = 'lapack: bad Trans'
+pub const bad_update_schur_comp = 'lapack: bad UpdateSchurComp'
+pub const bad_uplo = 'lapack: bad Uplo'
+pub const both_svd_over = 'lapack: both jobU and jobVT are lapack.SVDOverwrite'
+
+// Panic strings for bad numerical and string values.
+pub const bad_ifst = 'lapack: ifst out of range'
+pub const bad_ihi = 'lapack: ihi out of range'
+pub const bad_ihiz = 'lapack: ihiz out of range'
+pub const bad_ilo = 'lapack: ilo out of range'
+pub const bad_iloz = 'lapack: iloz out of range'
+pub const bad_ilst = 'lapack: ilst out of range'
+pub const bad_isave = 'lapack: bad isave value'
+pub const bad_ispec = 'lapack: bad ispec value'
+pub const bad_j1 = 'lapack: j1 out of range'
+pub const bad_jpvt = 'lapack: bad element of jpvt'
+pub const bad_k1 = 'lapack: k1 out of range'
+pub const bad_k2 = 'lapack: k2 out of range'
+pub const bad_kacc22 = 'lapack: invalid value of kacc22'
+pub const bad_kbot = 'lapack: kbot out of range'
+pub const bad_ktop = 'lapack: ktop out of range'
+pub const bad_l_work = 'lapack: insufficient declared workspace length'
+pub const bad_mm = 'lapack: mm out of range'
+pub const bad_n1 = 'lapack: bad value of n1'
+pub const bad_n2 = 'lapack: bad value of n2'
+pub const bad_na = 'lapack: bad value of na'
+pub const bad_name = 'lapack: bad name'
+pub const bad_nh = 'lapack: bad value of nh'
+pub const bad_nw = 'lapack: bad value of nw'
+pub const bad_pp = 'lapack: bad value of pp'
+pub const bad_shifts = 'lapack: bad shifts'
+pub const i0lt0 = 'lapack: i0 < 0'
+pub const k_gtm = 'lapack: k > m'
+pub const k_gtn = 'lapack: k > n'
+pub const k_lt0 = 'lapack: k < 0'
+pub const k_lt1 = 'lapack: k < 1'
+pub const kd_lt0 = 'lapack: kd < 0'
+pub const kl_lt0 = 'lapack: kl < 0'
+pub const ku_lt0 = 'lapack: ku < 0'
+pub const m_gtn = 'lapack: m > n'
+pub const m_lt0 = 'lapack: m < 0'
+pub const mm_lt0 = 'lapack: mm < 0'
+pub const n0lt0 = 'lapack: n0 < 0'
+pub const n_gtm = 'lapack: n > m'
+pub const n_lt0 = 'lapack: n < 0'
+pub const n_lt1 = 'lapack: n < 1'
+pub const n_ltm = 'lapack: n < m'
+pub const nan_c_from = 'lapack: cfrom is NaN'
+pub const nan_c_to = 'lapack: cto is NaN'
+pub const nb_gtm = 'lapack: nb > m'
+pub const nb_gtn = 'lapack: nb > n'
+pub const nb_lt0 = 'lapack: nb < 0'
+pub const ncc_lt0 = 'lapack: ncc < 0'
+pub const ncvt_lt0 = 'lapack: ncvt < 0'
+pub const neg_a_norm = 'lapack: anorm < 0'
+pub const neg_z = 'lapack: negative z value'
+pub const nh_lt0 = 'lapack: nh < 0'
+pub const not_isolated = 'lapack: block is not isolated'
+pub const nrhs_lt0 = 'lapack: nrhs < 0'
+pub const nru_lt0 = 'lapack: nru < 0'
+pub const nshfts_lt0 = 'lapack: nshfts < 0'
+pub const nshfts_odd = 'lapack: nshfts must be even'
+pub const nv_lt0 = 'lapack: nv < 0'
+pub const offset_gtm = 'lapack: offset > m'
+pub const offset_lt0 = 'lapack: offset < 0'
+pub const p_lt0 = 'lapack: p < 0'
+pub const recur_lt0 = 'lapack: recur < 0'
+pub const zero_c_from = 'lapack: zero cfrom'
+
+// Panic strings for bad slice lengths.
+pub const bad_len_alpha = 'lapack: bad length of alpha'
+pub const bad_len_beta = 'lapack: bad length of beta'
+pub const bad_len_ipiv = 'lapack: bad length of ipiv'
+pub const bad_len_jpiv = 'lapack: bad length of jpiv'
+pub const bad_len_jpvt = 'lapack: bad length of jpvt'
+pub const bad_len_k = 'lapack: bad length of k'
+pub const bad_len_piv = 'lapack: bad length of piv'
+pub const bad_len_selected = 'lapack: bad length of selected'
+pub const bad_len_si = 'lapack: bad length of si'
+pub const bad_len_sr = 'lapack: bad length of sr'
+pub const bad_len_tau = 'lapack: bad length of tau'
+pub const bad_len_wi = 'lapack: bad length of wi'
+pub const bad_len_wr = 'lapack: bad length of wr'
+
+// Panic strings for insufficient slice lengths.
+pub const short_a = 'lapack: insufficient length of a'
+pub const short_ab = 'lapack: insufficient length of ab'
+pub const short_auxv = 'lapack: insufficient length of auxv'
+pub const short_b = 'lapack: insufficient length of b'
+pub const short_c = 'lapack: insufficient length of c'
+pub const short_c_norm = 'lapack: insufficient length of cnorm'
+pub const short_d = 'lapack: insufficient length of d'
+pub const short_dl = 'lapack: insufficient length of dl'
+pub const short_du = 'lapack: insufficient length of du'
+pub const short_e = 'lapack: insufficient length of e'
+pub const short_f = 'lapack: insufficient length of f'
+pub const short_h = 'lapack: insufficient length of h'
+pub const short_i_work = 'lapack: insufficient length of iwork'
+pub const short_isgn = 'lapack: insufficient length of isgn'
+pub const short_q = 'lapack: insufficient length of q'
+pub const short_rhs = 'lapack: insufficient length of rhs'
+pub const short_s = 'lapack: insufficient length of s'
+pub const short_scale = 'lapack: insufficient length of scale'
+pub const short_t = 'lapack: insufficient length of t'
+pub const short_tau = 'lapack: insufficient length of tau'
+pub const short_tau_p = 'lapack: insufficient length of tauP'
+pub const short_tau_q = 'lapack: insufficient length of tauQ'
+pub const short_u = 'lapack: insufficient length of u'
+pub const short_v = 'lapack: insufficient length of v'
+pub const short_vl = 'lapack: insufficient length of vl'
+pub const short_vr = 'lapack: insufficient length of vr'
+pub const short_vt = 'lapack: insufficient length of vt'
+pub const short_vn1 = 'lapack: insufficient length of vn1'
+pub const short_vn2 = 'lapack: insufficient length of vn2'
+pub const short_w = 'lapack: insufficient length of w'
+pub const short_wh = 'lapack: insufficient length of wh'
+pub const short_wv = 'lapack: insufficient length of wv'
+pub const short_wi = 'lapack: insufficient length of wi'
+pub const short_work = 'lapack: insufficient length of work'
+pub const short_wr = 'lapack: insufficient length of wr'
+pub const short_x = 'lapack: insufficient length of x'
+pub const short_y = 'lapack: insufficient length of y'
+pub const short_z = 'lapack: insufficient length of z'
+
+// Panic strings for bad leading dimensions of matrices.
+pub const bad_ld_a = 'lapack: bad leading dimension of A'
+pub const bad_ld_b = 'lapack: bad leading dimension of B'
+pub const bad_ld_c = 'lapack: bad leading dimension of C'
+pub const bad_ld_f = 'lapack: bad leading dimension of F'
+pub const bad_ld_h = 'lapack: bad leading dimension of H'
+pub const bad_ld_q = 'lapack: bad leading dimension of Q'
+pub const bad_ld_t = 'lapack: bad leading dimension of T'
+pub const bad_ld_u = 'lapack: bad leading dimension of U'
+pub const bad_ld_v = 'lapack: bad leading dimension of V'
+pub const bad_ld_vl = 'lapack: bad leading dimension of VL'
+pub const bad_ld_vr = 'lapack: bad leading dimension of VR'
+pub const bad_ld_vt = 'lapack: bad leading dimension of VT'
+pub const bad_ld_w = 'lapack: bad leading dimension of W'
+pub const bad_ld_wh = 'lapack: bad leading dimension of WH'
+pub const bad_ld_wv = 'lapack: bad leading dimension of WV'
+pub const bad_ld_work = 'lapack: bad leading dimension of Work'
+pub const bad_ld_x = 'lapack: bad leading dimension of X'
+pub const bad_ld_y = 'lapack: bad leading dimension of Y'
+pub const bad_ld_z = 'lapack: bad leading dimension of Z'
+
+// Panic strings for bad vector increments.
+pub const abs_inc_not_one = 'lapack: increment not one or negative one'
+pub const bad_inc_x = 'lapack: incXpub const <= 0'
+pub const bad_inc_y = 'lapack: incYpub const <= 0'
+pub const zero_inc_v = 'lapack:pub const incvpub const == 0'
diff --git a/vlas/lapack_common.v b/lapack/lapack_common.v
similarity index 79%
rename from vlas/lapack_common.v
rename to lapack/lapack_common.v
index 8a30e8f5e..14d291ee0 100644
--- a/vlas/lapack_common.v
+++ b/lapack/lapack_common.v
@@ -1,25 +1,26 @@
-module vlas
+module lapack
 
 import vsl.errors
-import vsl.vlas.internal.blas
+import vsl.blas
+import vsl.blas.blas64
 
-fn C.LAPACKE_dgesv(matrix_layout blas.MemoryLayout, n int, nrhs int, a &f64, lda int, ipiv &int, b &f64, ldb int) int
+fn C.LAPACKE_dgesv(matrix_layout blas64.MemoryLayout, n int, nrhs int, a &f64, lda int, ipiv &int, b &f64, ldb int) int
 
-fn C.LAPACKE_dgesvd(matrix_layout blas.MemoryLayout, jobu &char, jobvt &char, m int, n int, a &f64, lda int, s &f64, u &f64, ldu int, vt &f64, ldvt int, superb &f64) int
+fn C.LAPACKE_dgesvd(matrix_layout blas64.MemoryLayout, jobu &char, jobvt &char, m int, n int, a &f64, lda int, s &f64, u &f64, ldu int, vt &f64, ldvt int, superb &f64) int
 
-fn C.LAPACKE_dgetrf(matrix_layout blas.MemoryLayout, m int, n int, a &f64, lda int, ipiv &int) int
+fn C.LAPACKE_dgetrf(matrix_layout blas64.MemoryLayout, m int, n int, a &f64, lda int, ipiv &int) int
 
-fn C.LAPACKE_dgetri(matrix_layout blas.MemoryLayout, n int, a &f64, lda int, ipiv &int) int
+fn C.LAPACKE_dgetri(matrix_layout blas64.MemoryLayout, n int, a &f64, lda int, ipiv &int) int
 
-fn C.LAPACKE_dpotrf(matrix_layout blas.MemoryLayout, up u32, n int, a &f64, lda int) int
+fn C.LAPACKE_dpotrf(matrix_layout blas64.MemoryLayout, up u32, n int, a &f64, lda int) int
 
-fn C.LAPACKE_dgeev(matrix_layout blas.MemoryLayout, calc_vl &char, calc_vr &char, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int
+fn C.LAPACKE_dgeev(matrix_layout blas64.MemoryLayout, calc_vl &char, calc_vr &char, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int
 
-fn C.LAPACKE_dsyev(matrix_layout blas.MemoryLayout, jobz byte, uplo byte, n int, a &f64, lda int, w &f64, work &f64, lwork int) int
+fn C.LAPACKE_dsyev(matrix_layout blas64.MemoryLayout, jobz byte, uplo byte, n int, a &f64, lda int, w &f64, work &f64, lwork int) int
 
-fn C.LAPACKE_dgebal(matrix_layout blas.MemoryLayout, job &char, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int
+fn C.LAPACKE_dgebal(matrix_layout blas64.MemoryLayout, job &char, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int
 
-fn C.LAPACKE_dgehrd(matrix_layout blas.MemoryLayout, n int, ilo int, ihi int, a &f64, lda int, tau &f64, work &f64, lwork int) int
+fn C.LAPACKE_dgehrd(matrix_layout blas64.MemoryLayout, n int, ilo int, ihi int, a &f64, lda int, tau &f64, work &f64, lwork int) int
 
 // dgesv computes the solution to a real system of linear equations.
 //
@@ -143,7 +144,7 @@ pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) {
 // This is the block version of the algorithm, calling Level 3 BLAS.
 pub fn dpotrf(up bool, n int, mut a []f64, lda int) {
 	unsafe {
-		info := C.LAPACKE_dpotrf(.row_major, l_uplo(up), n, &a[0], lda)
+		info := C.LAPACKE_dpotrf(.row_major, blas.l_uplo(up), n, &a[0], lda)
 		if info != 0 {
 			errors.vsl_panic('lapack failed', .efailed)
 		}
@@ -189,7 +190,7 @@ pub fn dgeev(calc_vl bool, calc_vr bool, n int, mut a []f64, lda int, wr []f64,
 		ldvr = 1
 	}
 	unsafe {
-		info := C.LAPACKE_dgeev(.row_major, &char(job_vlr(calc_vl).str().str), &char(job_vlr(calc_vr).str().str),
+		info := C.LAPACKE_dgeev(.row_major, &char(blas.job_vlr(calc_vl).str().str), &char(blas.job_vlr(calc_vr).str().str),
 			n, &a[0], lda, &wr[0], &wi[0], &vvl, ldvl, &vvr, ldvr)
 		if info != 0 {
 			errors.vsl_panic('lapack failed', .efailed)
diff --git a/vlas/lapack_default.c.v b/lapack/lapack_default.c.v
similarity index 54%
rename from vlas/lapack_default.c.v
rename to lapack/lapack_default.c.v
index deab5058f..a16459354 100644
--- a/vlas/lapack_default.c.v
+++ b/lapack/lapack_default.c.v
@@ -1,8 +1,8 @@
-module vlas
+module lapack
 
-import vsl.vlas.internal.blas
+import vsl.blas.blas64
 
-fn C.LAPACKE_dlange(matrix_layout blas.MemoryLayout, norm &char, m int, n int, a &f64, lda int, work &f64) f64
+fn C.LAPACKE_dlange(matrix_layout blas64.MemoryLayout, norm &char, m int, n int, a &f64, lda int, work &f64) f64
 
 pub fn dlange(norm rune, m int, n int, a []f64, lda int, work []f64) f64 {
 	return unsafe {
diff --git a/vlas/lapack_macos.c.v b/lapack/lapack_macos.c.v
similarity index 94%
rename from vlas/lapack_macos.c.v
rename to lapack/lapack_macos.c.v
index 2c0cbc7c3..b12ecacf9 100644
--- a/vlas/lapack_macos.c.v
+++ b/lapack/lapack_macos.c.v
@@ -1,4 +1,4 @@
-module vlas
+module lapack
 
 fn C.LAPACKE_dlange(norm &char, m int, n int, a &f64, lda int, work &f64) f64
 
diff --git a/vlas/v.mod b/lapack/v.mod
similarity index 63%
rename from vlas/v.mod
rename to lapack/v.mod
index e036330dd..f4dca2f9d 100644
--- a/vlas/v.mod
+++ b/lapack/v.mod
@@ -1,6 +1,6 @@
 Module {
-        name: 'vlas'
-        description: 'The V Linear Algebra System'
+        name: 'lapack'
+        description: 'The V Linear Algebra Package'
         version: '0.1.0'
         license: 'MIT'
 	    repo_url: 'https://github.com/vlang/vsl'
diff --git a/vlas/README.md b/vlas/README.md
deleted file mode 100644
index 1677eb1c8..000000000
--- a/vlas/README.md
+++ /dev/null
@@ -1,83 +0,0 @@
-# V Linear Algebra System
-
-This package implements BLAS and LAPACKE functions. It provides different backends:
-
-| Backend  | Description                                                                                                                                                        | Status  | Compilation Flags     |
-| -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------- | --------------------- |
-| VLAS     | Pure V implementation                                                                                                                                              | WIP     | `NONE`                |
-| OpenBLAS | OpenBLAS is an optimized BLAS library based on <https://github.com/xianyi/OpenBLAS>. Check the section [OpenBLAS Backend](#openblas-backend) for more information. | Working | `-d vsl_vlas_cblas`   |
-| LAPACKE  | LAPACKE is a C interface to the LAPACK linear algebra routines                                                                                                     | Working | `-d vsl_vlas_lapacke` |
-
-Therefore, its routines are a little more _lower level_ than the ones in the package `vsl.la`.
-
-## OpenBLAS Backend
-
-Use the flag `-d vsl_vlas_cblas` to use the OpenBLAS backend.
-
-### Install dependencies
-
-#### Debian/Ubuntu GNU Linux
-
-`libopenblas-dev` is not needed when using the pure V backend.
-
-```sh
-sudo apt-get install -y --no-install-recommends \
-    gcc \
-    gfortran \
-    libopenblas-dev
-```
-
-#### Arch Linux/Manjaro GNU Linux
-
-The best way of installing OpenBlas is using
-[lapack-openblas](https://aur.archlinux.org/packages/lapack-openblas/).
-
-```sh
-yay -S lapack-openblas
-```
-
-or
-
-```sh
-git clone https://aur.archlinux.org/lapack-openblas.git /tmp/lapack-openblas
-cd /tmp/lapack-openblas
-makepkg -si
-```
-
-#### macOS
-
-```sh
-brew install openblas
-```
-
-## LAPACKE Backend
-
-Use the flag `-d vsl_vlas_lapacke` to use the LAPACKE backend (enabled by default for now).
-
-### Install dependencies
-
-#### Debian/Ubuntu GNU Linux
-
-```sh
-sudo apt-get install -y --no-install-recommends \
-    gcc \
-    gfortran \
-    liblapacke-dev
-```
-
-#### Arch Linux/Manjaro GNU Linux
-
-The best way of installing LAPACKE is using
-[lapack-openblas](https://aur.archlinux.org/packages/lapack-openblas/).
-
-```sh
-yay -S lapack-openblas
-```
-
-or
-
-```sh
-git clone https://aur.archlinux.org/lapack-openblas.git /tmp/lapack-openblas
-cd /tmp/lapack-openblas
-makepkg -si
-```
diff --git a/vlas/oblas_d_vsl_vlas_cblas.v b/vlas/oblas_d_vsl_vlas_cblas.v
deleted file mode 100644
index eb2d72b1d..000000000
--- a/vlas/oblas_d_vsl_vlas_cblas.v
+++ /dev/null
@@ -1,448 +0,0 @@
-module vlas
-
-import vsl.vlas.internal.blas
-
-fn C.openblas_set_num_threads(n int)
-
-fn C.cblas_sdsdot(n int, alpha f32, x &f32, incx int, y &f32, incy int) f32
-fn C.cblas_dsdot(n int, x &f32, incx int, y &f32, incy int) f64
-fn C.cblas_sdot(n int, x &f32, incx int, y &f32, incy int) f32
-fn C.cblas_ddot(n int, x &f64, incx int, y &f64, incy int) f64
-fn C.cblas_cdotu(n int, x voidptr, incx int, y voidptr, incy int) f32
-fn C.cblas_cdotc(n int, x voidptr, incx int, y voidptr, incy int) f32
-fn C.cblas_zdotu(n int, x voidptr, incx int, y voidptr, incy int) f64
-fn C.cblas_zdotc(n int, x voidptr, incx int, y voidptr, incy int) f64
-fn C.cblas_cdotu_sub(n int, x voidptr, incx int, y voidptr, incy int, ret voidptr)
-fn C.cblas_cdotc_sub(n int, x voidptr, incx int, y voidptr, incy int, ret voidptr)
-fn C.cblas_zdotu_sub(n int, x voidptr, incx int, y voidptr, incy int, ret voidptr)
-fn C.cblas_zdotc_sub(n int, x voidptr, incx int, y voidptr, incy int, ret voidptr)
-fn C.cblas_sasum(n int, x &f32, incx int) f32
-fn C.cblas_dasum(n int, x &f64, incx int) f64
-fn C.cblas_scasum(n int, x voidptr, incx int) f32
-fn C.cblas_dzasum(n int, x voidptr, incx int) f64
-fn C.cblas_ssum(n int, x &f32, incx int) f32
-fn C.cblas_dsum(n int, x &f64, incx int) f64
-fn C.cblas_scsum(n int, x voidptr, incx int) f32
-fn C.cblas_dzsum(n int, x voidptr, incx int) f64
-fn C.cblas_snrm2(n int, x &f32, incx int) f32
-fn C.cblas_dnrm2(n int, x &f64, incx int) f64
-fn C.cblas_scnrm2(n int, x voidptr, incx int) f32
-fn C.cblas_dznrm2(n int, x voidptr, incx int) f64
-
-fn C.cblas_isamax(n int, x &f32, incx int) int
-fn C.cblas_idamax(n int, x &f64, incx int) int
-fn C.cblas_icamax(n int, x voidptr, incx int) int
-fn C.cblas_izamax(n int, x voidptr, incx int) int
-fn C.cblas_isamin(n int, x &f32, incx int) int
-fn C.cblas_idamin(n int, x &f64, incx int) int
-fn C.cblas_icamin(n int, x voidptr, incx int) int
-fn C.cblas_izamin(n int, x voidptr, incx int) int
-fn C.cblas_ismax(n int, x &f32, incx int) int
-fn C.cblas_idmax(n int, x &f64, incx int) int
-fn C.cblas_icmax(n int, x voidptr, incx int) int
-fn C.cblas_izmax(n int, x voidptr, incx int) int
-fn C.cblas_ismin(n int, x &f32, incx int) int
-fn C.cblas_idmin(n int, x &f64, incx int) int
-fn C.cblas_icmin(n int, x voidptr, incx int) int
-fn C.cblas_izmin(n int, x voidptr, incx int) int
-fn C.cblas_saxpy(n int, alpha f32, x &f32, incx int, y &f32, incy int)
-fn C.cblas_daxpy(n int, alpha f64, x &f64, incx int, y &f64, incy int)
-fn C.cblas_caxpy(n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int)
-fn C.cblas_zaxpy(n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int)
-fn C.cblas_scopy(n int, x &f32, incx int, y &f32, incy int)
-fn C.cblas_dcopy(n int, x &f64, incx int, y &f64, incy int)
-fn C.cblas_ccopy(n int, x voidptr, incx int, y voidptr, incy int)
-fn C.cblas_zcopy(n int, x voidptr, incx int, y voidptr, incy int)
-fn C.cblas_sswap(n int, x &f32, incx int, y &f32, incy int)
-fn C.cblas_dswap(n int, x &f64, incx int, y &f64, incy int)
-fn C.cblas_cswap(n int, x voidptr, incx int, y voidptr, incy int)
-fn C.cblas_zswap(n int, x voidptr, incx int, y voidptr, incy int)
-fn C.cblas_srot(n int, x &f32, incx int, y &f32, incy int, c f32, s f32)
-fn C.cblas_drot(n int, x &f64, incx int, y &f64, incy int, c f64, s f64)
-fn C.cblas_srotg(a &f32, b &f32, c &f32, s &f32)
-fn C.cblas_drotg(a &f64, b &f64, c &f64, s &f64)
-fn C.cblas_srotm(n int, x &f32, incx int, y &f32, incy int, p &f32)
-fn C.cblas_drotm(n int, x &f64, incx int, y &f64, incy int, p &f64)
-fn C.cblas_srotmg(d1 &f32, d2 &f32, b1 &f32, b2 f32, p &f32)
-fn C.cblas_drotmg(d1 &f64, d2 &f64, b1 &f64, b2 f64, p &f64)
-fn C.cblas_sscal(n int, alpha f32, x &f32, incx int)
-fn C.cblas_dscal(n int, alpha f64, x &f64, incx int)
-fn C.cblas_cscal(n int, alpha voidptr, x voidptr, incx int)
-fn C.cblas_zscal(n int, alpha voidptr, x voidptr, incx int)
-fn C.cblas_csscal(n int, alpha f32, x voidptr, incx int)
-fn C.cblas_zdscal(n int, alpha f64, x voidptr, incx int)
-fn C.cblas_sgemv(order blas.MemoryLayout, trans blas.Transpose, m int, n int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int)
-fn C.cblas_dgemv(order blas.MemoryLayout, trans blas.Transpose, m int, n int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int)
-fn C.cblas_cgemv(order blas.MemoryLayout, trans blas.Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_zgemv(order blas.MemoryLayout, trans blas.Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_sger(order blas.MemoryLayout, m int, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32, lda int)
-fn C.cblas_dger(order blas.MemoryLayout, m int, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64, lda int)
-fn C.cblas_cgeru(order blas.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
-fn C.cblas_cgerc(order blas.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
-fn C.cblas_zgeru(order blas.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
-fn C.cblas_zgerc(order blas.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
-fn C.cblas_strsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, a &f32, lda int, x &f32, incx int)
-fn C.cblas_dtrsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, a &f64, lda int, x &f64, incx int)
-fn C.cblas_ctrsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, a voidptr, lda int, x voidptr, incx int)
-fn C.cblas_ztrsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, a voidptr, lda int, x voidptr, incx int)
-fn C.cblas_strmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, a &f32, lda int, x &f32, incx int)
-fn C.cblas_dtrmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, a &f64, lda int, x &f64, incx int)
-fn C.cblas_ctrmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, a voidptr, lda int, x voidptr, incx int)
-fn C.cblas_ztrmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, a voidptr, lda int, x voidptr, incx int)
-fn C.cblas_ssyr(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f32, x &f32, incx int, a &f32, lda int)
-fn C.cblas_dsyr(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f64, x &f64, incx int, a &f64, lda int)
-fn C.cblas_cher(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f32, x voidptr, incx int, a voidptr, lda int)
-fn C.cblas_zher(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f64, x voidptr, incx int, a voidptr, lda int)
-fn C.cblas_ssyr2(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32, lda int)
-fn C.cblas_dsyr2(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64, lda int)
-fn C.cblas_cher2(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
-fn C.cblas_zher2(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
-fn C.cblas_sgbmv(order blas.MemoryLayout, transA blas.Transpose, m int, n int, kl int, ku int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int)
-fn C.cblas_dgbmv(order blas.MemoryLayout, transA blas.Transpose, m int, n int, kl int, ku int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int)
-fn C.cblas_cgbmv(order blas.MemoryLayout, transA blas.Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_zgbmv(order blas.MemoryLayout, transA blas.Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_ssbmv(order blas.MemoryLayout, uplo blas.Uplo, n int, k int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int)
-fn C.cblas_dsbmv(order blas.MemoryLayout, uplo blas.Uplo, n int, k int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int)
-fn C.cblas_stbmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, k int, a &f32, lda int, x &f32, incx int)
-fn C.cblas_dtbmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, k int, a &f64, lda int, x &f64, incx int)
-fn C.cblas_ctbmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int)
-fn C.cblas_ztbmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int)
-fn C.cblas_stbsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, k int, a &f32, lda int, x &f32, incx int)
-fn C.cblas_dtbsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, k int, a &f64, lda int, x &f64, incx int)
-fn C.cblas_ctbsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int)
-fn C.cblas_ztbsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int)
-fn C.cblas_stpmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, ap &f32, x &f32, incx int)
-fn C.cblas_dtpmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, ap &f64, x &f64, incx int)
-fn C.cblas_ctpmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, ap voidptr, x voidptr, incx int)
-fn C.cblas_ztpmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, ap voidptr, x voidptr, incx int)
-fn C.cblas_stpsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, ap &f32, x &f32, incx int)
-fn C.cblas_dtpsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, ap &f64, x &f64, incx int)
-fn C.cblas_ctpsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, ap voidptr, x voidptr, incx int)
-fn C.cblas_ztpsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, ap voidptr, x voidptr, incx int)
-fn C.cblas_ssymv(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int)
-fn C.cblas_dsymv(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int)
-fn C.cblas_chemv(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_zhemv(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_sspmv(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f32, ap &f32, x &f32, incx int, beta f32, y &f32, incy int)
-fn C.cblas_dspmv(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f64, ap &f64, x &f64, incx int, beta f64, y &f64, incy int)
-fn C.cblas_sspr(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f32, x &f32, incx int, ap &f32)
-fn C.cblas_dspr(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f64, x &f64, incx int, ap &f64)
-fn C.cblas_chpr(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f32, x voidptr, incx int, a voidptr)
-fn C.cblas_zhpr(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f64, x voidptr, incx int, a voidptr)
-fn C.cblas_sspr2(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32)
-fn C.cblas_dspr2(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64)
-fn C.cblas_chpr2(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, AP voidptr)
-fn C.cblas_zhpr2(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, AP voidptr)
-fn C.cblas_chbmv(order blas.MemoryLayout, uplo blas.Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_zhbmv(order blas.MemoryLayout, uplo blas.Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_chpmv(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha voidptr, AP voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_zhpmv(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha voidptr, AP voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_sgemm(order blas.MemoryLayout, transA blas.Transpose, transB blas.Transpose, m int, n int, k int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int)
-fn C.cblas_dgemm(order blas.MemoryLayout, transA blas.Transpose, transB blas.Transpose, m int, n int, k int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int)
-fn C.cblas_cgemm(order blas.MemoryLayout, transA blas.Transpose, transB blas.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_cgemm3m(order blas.MemoryLayout, transA blas.Transpose, transB blas.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_zgemm(order blas.MemoryLayout, transA blas.Transpose, transB blas.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_zgemm3m(order blas.MemoryLayout, transA blas.Transpose, transB blas.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_ssymm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int)
-fn C.cblas_dsymm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int)
-fn C.cblas_csymm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_zsymm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_ssyrk(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha f32, a &f32, lda int, beta f32, c &f32, ldc int)
-fn C.cblas_dsyrk(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha f64, a &f64, lda int, beta f64, c &f64, ldc int)
-fn C.cblas_csyrk(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_zsyrk(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_ssyr2k(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int)
-fn C.cblas_dsyr2k(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int)
-fn C.cblas_csyr2k(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_zsyr2k(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_strmm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int)
-fn C.cblas_dtrmm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int)
-fn C.cblas_ctrmm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int)
-fn C.cblas_ztrmm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int)
-fn C.cblas_strsm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int)
-fn C.cblas_dtrsm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int)
-fn C.cblas_ctrsm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int)
-fn C.cblas_ztrsm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int)
-fn C.cblas_chemm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_zhemm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_cherk(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha f32, a voidptr, lda int, beta f32, c voidptr, ldc int)
-fn C.cblas_zherk(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha f64, a voidptr, lda int, beta f64, c voidptr, ldc int)
-fn C.cblas_cher2k(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta f32, c voidptr, ldc int)
-fn C.cblas_zher2k(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta f64, c voidptr, ldc int)
-fn C.cblas_xerbla(p int, rout &byte, form &byte, other voidptr)
-
-fn C.cblas_saxpby(n int, alpha f32, x &f32, incx int, beta f32, y &f32, incy int)
-fn C.cblas_daxpby(n int, alpha f64, x &f64, incx int, beta f64, y &f64, incy int)
-fn C.cblas_caxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_zaxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_somatcopy(corder blas.MemoryLayout, ctrans blas.Transpose, crows int, ccols int, calpha f32, a &f32, clda int, b &f32, cldb int)
-fn C.cblas_domatcopy(corder blas.MemoryLayout, ctrans blas.Transpose, crows int, ccols int, calpha f64, a &f64, clda int, b &f64, cldb int)
-fn C.cblas_comatcopy(corder blas.MemoryLayout, ctrans blas.Transpose, crows int, ccols int, calpha &f32, a &f32, clda int, b &f32, cldb int)
-fn C.cblas_zomatcopy(corder blas.MemoryLayout, ctrans blas.Transpose, crows int, ccols int, calpha &f64, a &f64, clda int, b &f64, cldb int)
-fn C.cblas_simatcopy(corder blas.MemoryLayout, ctrans blas.Transpose, crows int, ccols int, calpha f32, a &f32, clda int, cldb int)
-fn C.cblas_dimatcopy(corder blas.MemoryLayout, ctrans blas.Transpose, crows int, ccols int, calpha f64, a &f64, clda int, cldb int)
-fn C.cblas_cimatcopy(corder blas.MemoryLayout, ctrans blas.Transpose, crows int, ccols int, calpha &f32, a &f32, clda int, cldb int)
-fn C.cblas_zimatcopy(corder blas.MemoryLayout, ctrans blas.Transpose, crows int, ccols int, calpha &f64, a &f64, clda int, cldb int)
-fn C.cblas_sgeadd(corder blas.MemoryLayout, crows int, ccols int, calpha f32, a &f32, clda int, cbeta f32, c &f32, cldc int)
-fn C.cblas_dgeadd(corder blas.MemoryLayout, crows int, ccols int, calpha f64, a &f64, clda int, cbeta f64, c &f64, cldc int)
-fn C.cblas_cgeadd(corder blas.MemoryLayout, crows int, ccols int, calpha &f32, a &f32, clda int, cbeta &f32, c &f32, cldc int)
-fn C.cblas_zgeadd(corder blas.MemoryLayout, crows int, ccols int, calpha &f64, a &f64, clda int, cbeta &f64, c &f64, cldc int)
-
-// set_num_threads sets the number of threads in OpenBLAS
-pub fn set_num_threads(n int) {
-	C.openblas_set_num_threads(n)
-}
-
-@[inline]
-pub fn sdsdot(n int, alpha f32, x []f32, incx int, y []f32, incy int) f32 {
-	return C.cblas_sdsdot(n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
-}
-
-@[inline]
-pub fn dsdot(n int, x []f32, incx int, y []f32, incy int) f64 {
-	return C.cblas_dsdot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
-}
-
-@[inline]
-pub fn sdot(n int, x []f32, incx int, y []f32, incy int) f32 {
-	return C.cblas_sdot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
-}
-
-@[inline]
-pub fn ddot(n int, x []f64, incx int, y []f64, incy int) f64 {
-	return C.cblas_ddot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
-}
-
-@[inline]
-pub fn sasum(n int, x []f32, incx int) f32 {
-	return C.cblas_sasum(n, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn dasum(n int, x []f64, incx int) f64 {
-	return C.cblas_dasum(n, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn ssum(n int, x []f32, incx int) f32 {
-	return C.cblas_ssum(n, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn dsum(n int, x []f64, incx int) f64 {
-	return C.cblas_dsum(n, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn snrm2(n int, x []f32, incx int) f32 {
-	return C.cblas_snrm2(n, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn dnrm2(n int, x []f64, incx int) f64 {
-	return C.cblas_dnrm2(n, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn isamax(n int, x []f32, incx int) int {
-	return C.cblas_isamax(n, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn idamax(n int, x []f64, incx int) int {
-	return C.cblas_idamax(n, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn isamin(n int, x []f32, incx int) int {
-	return C.cblas_isamin(n, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn idamin(n int, x &f64, incx int) int {
-	return C.cblas_idamin(n, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn ismax(n int, x []f32, incx int) int {
-	return C.cblas_ismax(n, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn idmax(n int, x []f64, incx int) int {
-	return C.cblas_idmax(n, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn ismin(n int, x []f32, incx int) int {
-	return C.cblas_ismin(n, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn idmin(n int, x []f64, incx int) int {
-	return C.cblas_idmin(n, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn saxpy(n int, alpha f32, x []f32, incx int, mut y []f32, incy int) {
-	C.cblas_saxpy(n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
-}
-
-@[inline]
-pub fn daxpy(n int, alpha f64, x []f64, incx int, mut y []f64, incy int) {
-	C.cblas_daxpy(n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
-}
-
-@[inline]
-pub fn scopy(n int, mut x []f32, incx int, mut y []f32, incy int) {
-	C.cblas_scopy(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
-}
-
-@[inline]
-pub fn dcopy(n int, mut x []f64, incx int, mut y []f64, incy int) {
-	C.cblas_dcopy(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
-}
-
-@[inline]
-pub fn sswap(n int, mut x []f32, incx int, mut y []f32, incy int) {
-	C.cblas_sswap(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
-}
-
-@[inline]
-pub fn dswap(n int, mut x []f64, incx int, mut y []f64, incy int) {
-	C.cblas_dswap(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
-}
-
-@[inline]
-pub fn srot(n int, mut x []f32, incx int, mut y []f32, incy int, c f32, s f32) {
-	C.cblas_srot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, c, s)
-}
-
-@[inline]
-pub fn drot(n int, mut x []f64, incx int, mut y []f64, incy int, c f64, s f64) {
-	C.cblas_drot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, c, s)
-}
-
-@[inline]
-pub fn srotg(a f32, b f32, c f32, s f32) {
-	C.cblas_srotg(&a, &b, &c, &s)
-}
-
-@[inline]
-pub fn drotg(a f64, b f64, c f64, s f64) {
-	C.cblas_drotg(&a, &b, &c, &s)
-}
-
-@[inline]
-pub fn srotm(n int, x []f32, incx int, y []f32, incy int, p []f32) {
-	C.cblas_srotm(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, unsafe { &p[0] })
-}
-
-@[inline]
-pub fn drotm(n int, x []f64, incx int, y []f64, incy int, p []f64) {
-	C.cblas_drotm(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, unsafe { &p[0] })
-}
-
-@[inline]
-pub fn srotmg(d1 f32, d2 f32, b1 f32, b2 f32, p []f32) {
-	C.cblas_srotmg(&d1, &d2, &b1, b2, unsafe { &p[0] })
-}
-
-@[inline]
-pub fn drotmg(d1 f64, d2 f64, b1 f64, b2 f32, p []f64) {
-	C.cblas_drotmg(&d1, &d2, &b1, b2, unsafe { &p[0] })
-}
-
-@[inline]
-pub fn sscal(n int, alpha f32, mut x []f32, incx int) {
-	C.cblas_sscal(n, alpha, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn dscal(n int, alpha f64, mut x []f64, incx int) {
-	C.cblas_dscal(n, alpha, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn sgemv(trans bool, m int, n int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) {
-	C.cblas_sgemv(.row_major, c_trans(trans), m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
-		incx, beta, unsafe { &y[0] }, incy)
-}
-
-@[inline]
-pub fn dgemv(trans bool, m int, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
-	C.cblas_dgemv(.row_major, c_trans(trans), m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
-		incx, beta, unsafe { &y[0] }, incy)
-}
-
-@[inline]
-pub fn sger(m int, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32, lda int) {
-	C.cblas_sger(.row_major, m, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy,
-		unsafe { &a[0] }, lda)
-}
-
-@[inline]
-pub fn dger(m int, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) {
-	C.cblas_dger(.row_major, m, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy,
-		unsafe { &a[0] }, lda)
-}
-
-@[inline]
-pub fn strsv(uplo bool, trans_a bool, diag blas.Diagonal, n int, a []f32, lda int, mut x []f32, incx int) {
-	C.cblas_strsv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] },
-		lda, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn dtrsv(uplo bool, trans_a bool, diag blas.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
-	C.cblas_dtrsv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] },
-		lda, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn strmv(uplo bool, trans_a bool, diag blas.Diagonal, n int, a []f32, lda int, mut x []f32, incx int) {
-	C.cblas_strmv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] },
-		lda, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn dtrmv(uplo bool, trans_a bool, diag blas.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
-	C.cblas_dtrmv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] },
-		lda, unsafe { &x[0] }, incx)
-}
-
-@[inline]
-pub fn ssyr(uplo bool, n int, alpha f32, x []f32, incx int, mut a []f32, lda int) {
-	C.cblas_ssyr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] },
-		lda)
-}
-
-@[inline]
-pub fn dsyr(uplo bool, n int, alpha f64, x []f64, incx int, mut a []f64, lda int) {
-	C.cblas_dsyr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] },
-		lda)
-}
-
-@[inline]
-pub fn ssyr2(uplo bool, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32, lda int) {
-	C.cblas_ssyr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] },
-		incy, unsafe { &a[0] }, lda)
-}
-
-@[inline]
-pub fn dsyr2(uplo bool, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) {
-	C.cblas_dsyr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] },
-		incy, unsafe { &a[0] }, lda)
-}
-
-@[inline]
-pub fn sgemm(trans_a bool, trans_b bool, m int, n int, k int, alpha f32, a []f32, lda int, b []f32, ldb int, beta f32, mut cc []f32, ldc int) {
-	C.cblas_sgemm(.row_major, c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, unsafe { &a[0] },
-		lda, unsafe { &b[0] }, ldb, beta, unsafe { &cc[0] }, ldc)
-}
-
-@[inline]
-pub fn dgemm(trans_a bool, trans_b bool, m int, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut cc []f64, ldc int) {
-	C.cblas_dgemm(.row_major, c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, unsafe { &a[0] },
-		lda, unsafe { &b[0] }, ldb, beta, unsafe { &cc[0] }, ldc)
-}

From d667c95c858bd01bfec2def6f2311045c205e1fe Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 7 Jan 2024 02:15:22 -0300
Subject: [PATCH 02/33] Update installation instructions for LAPACK-OpenBLAS

---
 blas/README.md | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/blas/README.md b/blas/README.md
index ac8c441d8..a21bdf992 100644
--- a/blas/README.md
+++ b/blas/README.md
@@ -57,10 +57,4 @@ cd /tmp/lapack-openblas
 makepkg -si
 ```
 
-### macOS
-
-```sh
-brew install openblas
-```
-
 </details>

From 10eea19687af64e1c8fb7f5d6eaf6cc4fafe9304 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 7 Jan 2024 02:37:19 -0300
Subject: [PATCH 03/33] Update imports and function signatures in lapack and
 blas modules

---
 blas/blas64/conversions.v        |   5 +
 blas/conversions.v               |  23 +++-
 blas/oblas_d_vsl_blas_cblas.v    | 230 +++++++++++++++----------------
 blas/oblas_notd_vsl_blas_cblas.v |   4 +-
 lapack/lapack_common.v           |  19 ++-
 lapack/lapack_default.c.v        |   4 +-
 6 files changed, 153 insertions(+), 132 deletions(-)

diff --git a/blas/blas64/conversions.v b/blas/blas64/conversions.v
index 591e7a2e8..6688d5887 100644
--- a/blas/blas64/conversions.v
+++ b/blas/blas64/conversions.v
@@ -1,10 +1,12 @@
 module blas64
 
+// MemoryLayout is used to specify the memory layout of a matrix.
 pub enum MemoryLayout {
 	row_major = 101
 	col_major = 102
 }
 
+// Transpose is used to specify the transposition of a matrix.
 pub enum Transpose {
 	no_trans      = 111
 	trans         = 112
@@ -12,16 +14,19 @@ pub enum Transpose {
 	conj_no_trans = 114
 }
 
+// Uplo is used to specify whether the upper or lower triangle of a matrix is
 pub enum Uplo {
 	upper = 121
 	lower = 122
 }
 
+// Diagonal is used to specify whether the diagonal of a matrix is unit or non-unit.
 pub enum Diagonal {
 	non_unit = 131
 	unit     = 132
 }
 
+// Side is used to specify whether a matrix is on the left or right side in a matrix-matrix multiplication.
 pub enum Side {
 	left  = 141
 	right = 142
diff --git a/blas/conversions.v b/blas/conversions.v
index 154edade5..e917e4f58 100644
--- a/blas/conversions.v
+++ b/blas/conversions.v
@@ -6,18 +6,37 @@ import math.complex
 import vsl.errors
 import vsl.blas.blas64
 
-pub fn c_trans(trans bool) blas64.Transpose {
+// MemoryLayout is used to specify the memory layout of a matrix.
+pub type MemoryLayout = blas64.MemoryLayout
+
+// Transpose is used to specify the transposition of a matrix.
+pub type Transpose = blas64.Transpose
+
+// Uplo is used to specify whether the upper or lower triangle of a matrix is
+pub type Uplo = blas64.Uplo
+
+// Diagonal is used to specify whether the diagonal of a matrix is unit or non-unit.
+pub type Diagonal = blas64.Diagonal
+
+// Side is used to specify whether a matrix is on the left or right side in a matrix-matrix multiplication.
+pub type Side = blas64.Side
+
+// c_trans is a helper function to convert bool to Transpose
+pub fn c_trans(trans bool) Transpose {
 	return if trans { .trans } else { .no_trans }
 }
 
-pub fn c_uplo(up bool) blas64.Uplo {
+// c_uplo is a helper function to convert bool to Uplo
+pub fn c_uplo(up bool) Uplo {
 	return if up { .upper } else { .lower }
 }
 
+// l_uplo is a helper function to convert bool to Uplo
 fn l_uplo(up bool) u8 {
 	return if up { `U` } else { `L` }
 }
 
+// job_vlr is a helper function to convert bool to char
 fn job_vlr(do_calc bool) rune {
 	return if do_calc { `V` } else { `N` }
 }
diff --git a/blas/oblas_d_vsl_blas_cblas.v b/blas/oblas_d_vsl_blas_cblas.v
index b4483c995..29be3be06 100644
--- a/blas/oblas_d_vsl_blas_cblas.v
+++ b/blas/oblas_d_vsl_blas_cblas.v
@@ -1,7 +1,5 @@
 module blas
 
-import vsl.blas64.blas64
-
 fn C.openblas_set_num_threads(n int)
 
 fn C.cblas_sdsdot(n int, alpha f32, x &f32, incx int, y &f32, incy int) f32
@@ -71,122 +69,122 @@ fn C.cblas_cscal(n int, alpha voidptr, x voidptr, incx int)
 fn C.cblas_zscal(n int, alpha voidptr, x voidptr, incx int)
 fn C.cblas_csscal(n int, alpha f32, x voidptr, incx int)
 fn C.cblas_zdscal(n int, alpha f64, x voidptr, incx int)
-fn C.cblas_sgemv(order blas64.MemoryLayout, trans blas64.Transpose, m int, n int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int)
-fn C.cblas_dgemv(order blas64.MemoryLayout, trans blas64.Transpose, m int, n int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int)
-fn C.cblas_cgemv(order blas64.MemoryLayout, trans blas64.Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_zgemv(order blas64.MemoryLayout, trans blas64.Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_sger(order blas64.MemoryLayout, m int, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32, lda int)
-fn C.cblas_dger(order blas64.MemoryLayout, m int, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64, lda int)
-fn C.cblas_cgeru(order blas64.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
-fn C.cblas_cgerc(order blas64.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
-fn C.cblas_zgeru(order blas64.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
-fn C.cblas_zgerc(order blas64.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
-fn C.cblas_strsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a &f32, lda int, x &f32, incx int)
-fn C.cblas_dtrsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a &f64, lda int, x &f64, incx int)
-fn C.cblas_ctrsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a voidptr, lda int, x voidptr, incx int)
-fn C.cblas_ztrsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a voidptr, lda int, x voidptr, incx int)
-fn C.cblas_strmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a &f32, lda int, x &f32, incx int)
-fn C.cblas_dtrmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a &f64, lda int, x &f64, incx int)
-fn C.cblas_ctrmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a voidptr, lda int, x voidptr, incx int)
-fn C.cblas_ztrmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a voidptr, lda int, x voidptr, incx int)
-fn C.cblas_ssyr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x &f32, incx int, a &f32, lda int)
-fn C.cblas_dsyr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x &f64, incx int, a &f64, lda int)
-fn C.cblas_cher(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x voidptr, incx int, a voidptr, lda int)
-fn C.cblas_zher(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x voidptr, incx int, a voidptr, lda int)
-fn C.cblas_ssyr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32, lda int)
-fn C.cblas_dsyr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64, lda int)
-fn C.cblas_cher2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
-fn C.cblas_zher2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
-fn C.cblas_sgbmv(order blas64.MemoryLayout, transA blas64.Transpose, m int, n int, kl int, ku int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int)
-fn C.cblas_dgbmv(order blas64.MemoryLayout, transA blas64.Transpose, m int, n int, kl int, ku int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int)
-fn C.cblas_cgbmv(order blas64.MemoryLayout, transA blas64.Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_zgbmv(order blas64.MemoryLayout, transA blas64.Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_ssbmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, k int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int)
-fn C.cblas_dsbmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, k int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int)
-fn C.cblas_stbmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a &f32, lda int, x &f32, incx int)
-fn C.cblas_dtbmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a &f64, lda int, x &f64, incx int)
-fn C.cblas_ctbmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int)
-fn C.cblas_ztbmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int)
-fn C.cblas_stbsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a &f32, lda int, x &f32, incx int)
-fn C.cblas_dtbsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a &f64, lda int, x &f64, incx int)
-fn C.cblas_ctbsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int)
-fn C.cblas_ztbsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int)
-fn C.cblas_stpmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap &f32, x &f32, incx int)
-fn C.cblas_dtpmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap &f64, x &f64, incx int)
-fn C.cblas_ctpmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap voidptr, x voidptr, incx int)
-fn C.cblas_ztpmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap voidptr, x voidptr, incx int)
-fn C.cblas_stpsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap &f32, x &f32, incx int)
-fn C.cblas_dtpsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap &f64, x &f64, incx int)
-fn C.cblas_ctpsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap voidptr, x voidptr, incx int)
-fn C.cblas_ztpsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap voidptr, x voidptr, incx int)
-fn C.cblas_ssymv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int)
-fn C.cblas_dsymv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int)
-fn C.cblas_chemv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_zhemv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_sspmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, ap &f32, x &f32, incx int, beta f32, y &f32, incy int)
-fn C.cblas_dspmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, ap &f64, x &f64, incx int, beta f64, y &f64, incy int)
-fn C.cblas_sspr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x &f32, incx int, ap &f32)
-fn C.cblas_dspr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x &f64, incx int, ap &f64)
-fn C.cblas_chpr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x voidptr, incx int, a voidptr)
-fn C.cblas_zhpr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x voidptr, incx int, a voidptr)
-fn C.cblas_sspr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32)
-fn C.cblas_dspr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64)
-fn C.cblas_chpr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, AP voidptr)
-fn C.cblas_zhpr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, AP voidptr)
-fn C.cblas_chbmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_zhbmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_chpmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, AP voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_zhpmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, AP voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_sgemm(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int)
-fn C.cblas_dgemm(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int)
-fn C.cblas_cgemm(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_cgemm3m(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_zgemm(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_zgemm3m(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_ssymm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int)
-fn C.cblas_dsymm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int)
-fn C.cblas_csymm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_zsymm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_ssyrk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f32, a &f32, lda int, beta f32, c &f32, ldc int)
-fn C.cblas_dsyrk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f64, a &f64, lda int, beta f64, c &f64, ldc int)
-fn C.cblas_csyrk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_zsyrk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_ssyr2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int)
-fn C.cblas_dsyr2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int)
-fn C.cblas_csyr2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_zsyr2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_strmm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int)
-fn C.cblas_dtrmm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int)
-fn C.cblas_ctrmm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int)
-fn C.cblas_ztrmm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int)
-fn C.cblas_strsm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int)
-fn C.cblas_dtrsm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int)
-fn C.cblas_ctrsm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int)
-fn C.cblas_ztrsm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int)
-fn C.cblas_chemm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_zhemm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
-fn C.cblas_cherk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f32, a voidptr, lda int, beta f32, c voidptr, ldc int)
-fn C.cblas_zherk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f64, a voidptr, lda int, beta f64, c voidptr, ldc int)
-fn C.cblas_cher2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta f32, c voidptr, ldc int)
-fn C.cblas_zher2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta f64, c voidptr, ldc int)
+fn C.cblas_sgemv(order MemoryLayout, trans Transpose, m int, n int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int)
+fn C.cblas_dgemv(order MemoryLayout, trans Transpose, m int, n int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int)
+fn C.cblas_cgemv(order MemoryLayout, trans Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_zgemv(order MemoryLayout, trans Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_sger(order MemoryLayout, m int, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32, lda int)
+fn C.cblas_dger(order MemoryLayout, m int, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64, lda int)
+fn C.cblas_cgeru(order MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
+fn C.cblas_cgerc(order MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
+fn C.cblas_zgeru(order MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
+fn C.cblas_zgerc(order MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
+fn C.cblas_strsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, a &f32, lda int, x &f32, incx int)
+fn C.cblas_dtrsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, a &f64, lda int, x &f64, incx int)
+fn C.cblas_ctrsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, a voidptr, lda int, x voidptr, incx int)
+fn C.cblas_ztrsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, a voidptr, lda int, x voidptr, incx int)
+fn C.cblas_strmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, a &f32, lda int, x &f32, incx int)
+fn C.cblas_dtrmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, a &f64, lda int, x &f64, incx int)
+fn C.cblas_ctrmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, a voidptr, lda int, x voidptr, incx int)
+fn C.cblas_ztrmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, a voidptr, lda int, x voidptr, incx int)
+fn C.cblas_ssyr(order MemoryLayout, uplo Uplo, n int, alpha f32, x &f32, incx int, a &f32, lda int)
+fn C.cblas_dsyr(order MemoryLayout, uplo Uplo, n int, alpha f64, x &f64, incx int, a &f64, lda int)
+fn C.cblas_cher(order MemoryLayout, uplo Uplo, n int, alpha f32, x voidptr, incx int, a voidptr, lda int)
+fn C.cblas_zher(order MemoryLayout, uplo Uplo, n int, alpha f64, x voidptr, incx int, a voidptr, lda int)
+fn C.cblas_ssyr2(order MemoryLayout, uplo Uplo, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32, lda int)
+fn C.cblas_dsyr2(order MemoryLayout, uplo Uplo, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64, lda int)
+fn C.cblas_cher2(order MemoryLayout, uplo Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
+fn C.cblas_zher2(order MemoryLayout, uplo Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int)
+fn C.cblas_sgbmv(order MemoryLayout, transA Transpose, m int, n int, kl int, ku int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int)
+fn C.cblas_dgbmv(order MemoryLayout, transA Transpose, m int, n int, kl int, ku int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int)
+fn C.cblas_cgbmv(order MemoryLayout, transA Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_zgbmv(order MemoryLayout, transA Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_ssbmv(order MemoryLayout, uplo Uplo, n int, k int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int)
+fn C.cblas_dsbmv(order MemoryLayout, uplo Uplo, n int, k int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int)
+fn C.cblas_stbmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, k int, a &f32, lda int, x &f32, incx int)
+fn C.cblas_dtbmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, k int, a &f64, lda int, x &f64, incx int)
+fn C.cblas_ctbmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int)
+fn C.cblas_ztbmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int)
+fn C.cblas_stbsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, k int, a &f32, lda int, x &f32, incx int)
+fn C.cblas_dtbsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, k int, a &f64, lda int, x &f64, incx int)
+fn C.cblas_ctbsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int)
+fn C.cblas_ztbsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int)
+fn C.cblas_stpmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, ap &f32, x &f32, incx int)
+fn C.cblas_dtpmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, ap &f64, x &f64, incx int)
+fn C.cblas_ctpmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, ap voidptr, x voidptr, incx int)
+fn C.cblas_ztpmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, ap voidptr, x voidptr, incx int)
+fn C.cblas_stpsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, ap &f32, x &f32, incx int)
+fn C.cblas_dtpsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, ap &f64, x &f64, incx int)
+fn C.cblas_ctpsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, ap voidptr, x voidptr, incx int)
+fn C.cblas_ztpsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, ap voidptr, x voidptr, incx int)
+fn C.cblas_ssymv(order MemoryLayout, uplo Uplo, n int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int)
+fn C.cblas_dsymv(order MemoryLayout, uplo Uplo, n int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int)
+fn C.cblas_chemv(order MemoryLayout, uplo Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_zhemv(order MemoryLayout, uplo Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_sspmv(order MemoryLayout, uplo Uplo, n int, alpha f32, ap &f32, x &f32, incx int, beta f32, y &f32, incy int)
+fn C.cblas_dspmv(order MemoryLayout, uplo Uplo, n int, alpha f64, ap &f64, x &f64, incx int, beta f64, y &f64, incy int)
+fn C.cblas_sspr(order MemoryLayout, uplo Uplo, n int, alpha f32, x &f32, incx int, ap &f32)
+fn C.cblas_dspr(order MemoryLayout, uplo Uplo, n int, alpha f64, x &f64, incx int, ap &f64)
+fn C.cblas_chpr(order MemoryLayout, uplo Uplo, n int, alpha f32, x voidptr, incx int, a voidptr)
+fn C.cblas_zhpr(order MemoryLayout, uplo Uplo, n int, alpha f64, x voidptr, incx int, a voidptr)
+fn C.cblas_sspr2(order MemoryLayout, uplo Uplo, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32)
+fn C.cblas_dspr2(order MemoryLayout, uplo Uplo, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64)
+fn C.cblas_chpr2(order MemoryLayout, uplo Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, AP voidptr)
+fn C.cblas_zhpr2(order MemoryLayout, uplo Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, AP voidptr)
+fn C.cblas_chbmv(order MemoryLayout, uplo Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_zhbmv(order MemoryLayout, uplo Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_chpmv(order MemoryLayout, uplo Uplo, n int, alpha voidptr, AP voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_zhpmv(order MemoryLayout, uplo Uplo, n int, alpha voidptr, AP voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int)
+fn C.cblas_sgemm(order MemoryLayout, transA Transpose, transB Transpose, m int, n int, k int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int)
+fn C.cblas_dgemm(order MemoryLayout, transA Transpose, transB Transpose, m int, n int, k int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int)
+fn C.cblas_cgemm(order MemoryLayout, transA Transpose, transB Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_cgemm3m(order MemoryLayout, transA Transpose, transB Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_zgemm(order MemoryLayout, transA Transpose, transB Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_zgemm3m(order MemoryLayout, transA Transpose, transB Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_ssymm(order MemoryLayout, side Side, uplo Uplo, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int)
+fn C.cblas_dsymm(order MemoryLayout, side Side, uplo Uplo, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int)
+fn C.cblas_csymm(order MemoryLayout, side Side, uplo Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_zsymm(order MemoryLayout, side Side, uplo Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_ssyrk(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha f32, a &f32, lda int, beta f32, c &f32, ldc int)
+fn C.cblas_dsyrk(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha f64, a &f64, lda int, beta f64, c &f64, ldc int)
+fn C.cblas_csyrk(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_zsyrk(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_ssyr2k(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int)
+fn C.cblas_dsyr2k(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int)
+fn C.cblas_csyr2k(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_zsyr2k(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_strmm(order MemoryLayout, side Side, uplo Uplo, transA Transpose, diag Diagonal, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int)
+fn C.cblas_dtrmm(order MemoryLayout, side Side, uplo Uplo, transA Transpose, diag Diagonal, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int)
+fn C.cblas_ctrmm(order MemoryLayout, side Side, uplo Uplo, transA Transpose, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int)
+fn C.cblas_ztrmm(order MemoryLayout, side Side, uplo Uplo, transA Transpose, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int)
+fn C.cblas_strsm(order MemoryLayout, side Side, uplo Uplo, transA Transpose, diag Diagonal, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int)
+fn C.cblas_dtrsm(order MemoryLayout, side Side, uplo Uplo, transA Transpose, diag Diagonal, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int)
+fn C.cblas_ctrsm(order MemoryLayout, side Side, uplo Uplo, transA Transpose, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int)
+fn C.cblas_ztrsm(order MemoryLayout, side Side, uplo Uplo, transA Transpose, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int)
+fn C.cblas_chemm(order MemoryLayout, side Side, uplo Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_zhemm(order MemoryLayout, side Side, uplo Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int)
+fn C.cblas_cherk(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha f32, a voidptr, lda int, beta f32, c voidptr, ldc int)
+fn C.cblas_zherk(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha f64, a voidptr, lda int, beta f64, c voidptr, ldc int)
+fn C.cblas_cher2k(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta f32, c voidptr, ldc int)
+fn C.cblas_zher2k(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta f64, c voidptr, ldc int)
 fn C.cblas_xerbla(p int, rout &byte, form &byte, other voidptr)
 
 fn C.cblas_saxpby(n int, alpha f32, x &f32, incx int, beta f32, y &f32, incy int)
 fn C.cblas_daxpby(n int, alpha f64, x &f64, incx int, beta f64, y &f64, incy int)
 fn C.cblas_caxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int)
 fn C.cblas_zaxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int)
-fn C.cblas_somatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha f32, a &f32, clda int, b &f32, cldb int)
-fn C.cblas_domatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha f64, a &f64, clda int, b &f64, cldb int)
-fn C.cblas_comatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha &f32, a &f32, clda int, b &f32, cldb int)
-fn C.cblas_zomatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha &f64, a &f64, clda int, b &f64, cldb int)
-fn C.cblas_simatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha f32, a &f32, clda int, cldb int)
-fn C.cblas_dimatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha f64, a &f64, clda int, cldb int)
-fn C.cblas_cimatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha &f32, a &f32, clda int, cldb int)
-fn C.cblas_zimatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha &f64, a &f64, clda int, cldb int)
-fn C.cblas_sgeadd(corder blas64.MemoryLayout, crows int, ccols int, calpha f32, a &f32, clda int, cbeta f32, c &f32, cldc int)
-fn C.cblas_dgeadd(corder blas64.MemoryLayout, crows int, ccols int, calpha f64, a &f64, clda int, cbeta f64, c &f64, cldc int)
-fn C.cblas_cgeadd(corder blas64.MemoryLayout, crows int, ccols int, calpha &f32, a &f32, clda int, cbeta &f32, c &f32, cldc int)
-fn C.cblas_zgeadd(corder blas64.MemoryLayout, crows int, ccols int, calpha &f64, a &f64, clda int, cbeta &f64, c &f64, cldc int)
+fn C.cblas_somatcopy(corder MemoryLayout, ctrans Transpose, crows int, ccols int, calpha f32, a &f32, clda int, b &f32, cldb int)
+fn C.cblas_domatcopy(corder MemoryLayout, ctrans Transpose, crows int, ccols int, calpha f64, a &f64, clda int, b &f64, cldb int)
+fn C.cblas_comatcopy(corder MemoryLayout, ctrans Transpose, crows int, ccols int, calpha &f32, a &f32, clda int, b &f32, cldb int)
+fn C.cblas_zomatcopy(corder MemoryLayout, ctrans Transpose, crows int, ccols int, calpha &f64, a &f64, clda int, b &f64, cldb int)
+fn C.cblas_simatcopy(corder MemoryLayout, ctrans Transpose, crows int, ccols int, calpha f32, a &f32, clda int, cldb int)
+fn C.cblas_dimatcopy(corder MemoryLayout, ctrans Transpose, crows int, ccols int, calpha f64, a &f64, clda int, cldb int)
+fn C.cblas_cimatcopy(corder MemoryLayout, ctrans Transpose, crows int, ccols int, calpha &f32, a &f32, clda int, cldb int)
+fn C.cblas_zimatcopy(corder MemoryLayout, ctrans Transpose, crows int, ccols int, calpha &f64, a &f64, clda int, cldb int)
+fn C.cblas_sgeadd(corder MemoryLayout, crows int, ccols int, calpha f32, a &f32, clda int, cbeta f32, c &f32, cldc int)
+fn C.cblas_dgeadd(corder MemoryLayout, crows int, ccols int, calpha f64, a &f64, clda int, cbeta f64, c &f64, cldc int)
+fn C.cblas_cgeadd(corder MemoryLayout, crows int, ccols int, calpha &f32, a &f32, clda int, cbeta &f32, c &f32, cldc int)
+fn C.cblas_zgeadd(corder MemoryLayout, crows int, ccols int, calpha &f64, a &f64, clda int, cbeta &f64, c &f64, cldc int)
 
 // set_num_threads sets the number of threads in OpenBLAS
 pub fn set_num_threads(n int) {
@@ -388,25 +386,25 @@ pub fn dger(m int, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a
 }
 
 @[inline]
-pub fn strsv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f32, lda int, mut x []f32, incx int) {
+pub fn strsv(uplo bool, trans_a bool, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) {
 	C.cblas_strsv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] },
 		lda, unsafe { &x[0] }, incx)
 }
 
 @[inline]
-pub fn dtrsv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
+pub fn dtrsv(uplo bool, trans_a bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
 	C.cblas_dtrsv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] },
 		lda, unsafe { &x[0] }, incx)
 }
 
 @[inline]
-pub fn strmv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f32, lda int, mut x []f32, incx int) {
+pub fn strmv(uplo bool, trans_a bool, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) {
 	C.cblas_strmv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] },
 		lda, unsafe { &x[0] }, incx)
 }
 
 @[inline]
-pub fn dtrmv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
+pub fn dtrmv(uplo bool, trans_a bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
 	C.cblas_dtrmv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] },
 		lda, unsafe { &x[0] }, incx)
 }
diff --git a/blas/oblas_notd_vsl_blas_cblas.v b/blas/oblas_notd_vsl_blas_cblas.v
index 763995224..8552dde50 100644
--- a/blas/oblas_notd_vsl_blas_cblas.v
+++ b/blas/oblas_notd_vsl_blas_cblas.v
@@ -57,12 +57,12 @@ pub fn dger(m int, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a
 }
 
 @[inline]
-pub fn dtrsv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
+pub fn dtrsv(uplo bool, trans_a bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
 	blas64.dtrsv(c_uplo(uplo), c_trans(trans_a), diag, n, a, lda, mut x, incx)
 }
 
 @[inline]
-pub fn dtrmv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
+pub fn dtrmv(uplo bool, trans_a bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
 	blas64.dtrmv(c_uplo(uplo), c_trans(trans_a), diag, n, a, lda, mut x, incx)
 }
 
diff --git a/lapack/lapack_common.v b/lapack/lapack_common.v
index 14d291ee0..0c4064c2b 100644
--- a/lapack/lapack_common.v
+++ b/lapack/lapack_common.v
@@ -2,25 +2,24 @@ module lapack
 
 import vsl.errors
 import vsl.blas
-import vsl.blas.blas64
 
-fn C.LAPACKE_dgesv(matrix_layout blas64.MemoryLayout, n int, nrhs int, a &f64, lda int, ipiv &int, b &f64, ldb int) int
+fn C.LAPACKE_dgesv(matrix_layout blas.MemoryLayout, n int, nrhs int, a &f64, lda int, ipiv &int, b &f64, ldb int) int
 
-fn C.LAPACKE_dgesvd(matrix_layout blas64.MemoryLayout, jobu &char, jobvt &char, m int, n int, a &f64, lda int, s &f64, u &f64, ldu int, vt &f64, ldvt int, superb &f64) int
+fn C.LAPACKE_dgesvd(matrix_layout blas.MemoryLayout, jobu &char, jobvt &char, m int, n int, a &f64, lda int, s &f64, u &f64, ldu int, vt &f64, ldvt int, superb &f64) int
 
-fn C.LAPACKE_dgetrf(matrix_layout blas64.MemoryLayout, m int, n int, a &f64, lda int, ipiv &int) int
+fn C.LAPACKE_dgetrf(matrix_layout blas.MemoryLayout, m int, n int, a &f64, lda int, ipiv &int) int
 
-fn C.LAPACKE_dgetri(matrix_layout blas64.MemoryLayout, n int, a &f64, lda int, ipiv &int) int
+fn C.LAPACKE_dgetri(matrix_layout blas.MemoryLayout, n int, a &f64, lda int, ipiv &int) int
 
-fn C.LAPACKE_dpotrf(matrix_layout blas64.MemoryLayout, up u32, n int, a &f64, lda int) int
+fn C.LAPACKE_dpotrf(matrix_layout blas.MemoryLayout, up u32, n int, a &f64, lda int) int
 
-fn C.LAPACKE_dgeev(matrix_layout blas64.MemoryLayout, calc_vl &char, calc_vr &char, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int
+fn C.LAPACKE_dgeev(matrix_layout blas.MemoryLayout, calc_vl &char, calc_vr &char, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int
 
-fn C.LAPACKE_dsyev(matrix_layout blas64.MemoryLayout, jobz byte, uplo byte, n int, a &f64, lda int, w &f64, work &f64, lwork int) int
+fn C.LAPACKE_dsyev(matrix_layout blas.MemoryLayout, jobz byte, uplo byte, n int, a &f64, lda int, w &f64, work &f64, lwork int) int
 
-fn C.LAPACKE_dgebal(matrix_layout blas64.MemoryLayout, job &char, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int
+fn C.LAPACKE_dgebal(matrix_layout blas.MemoryLayout, job &char, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int
 
-fn C.LAPACKE_dgehrd(matrix_layout blas64.MemoryLayout, n int, ilo int, ihi int, a &f64, lda int, tau &f64, work &f64, lwork int) int
+fn C.LAPACKE_dgehrd(matrix_layout blas.MemoryLayout, n int, ilo int, ihi int, a &f64, lda int, tau &f64, work &f64, lwork int) int
 
 // dgesv computes the solution to a real system of linear equations.
 //
diff --git a/lapack/lapack_default.c.v b/lapack/lapack_default.c.v
index a16459354..783b54980 100644
--- a/lapack/lapack_default.c.v
+++ b/lapack/lapack_default.c.v
@@ -1,8 +1,8 @@
 module lapack
 
-import vsl.blas.blas64
+import vsl.blas
 
-fn C.LAPACKE_dlange(matrix_layout blas64.MemoryLayout, norm &char, m int, n int, a &f64, lda int, work &f64) f64
+fn C.LAPACKE_dlange(matrix_layout blas.MemoryLayout, norm &char, m int, n int, a &f64, lda int, work &f64) f64
 
 pub fn dlange(norm rune, m int, n int, a []f64, lda int, work []f64) f64 {
 	return unsafe {

From 5bd805c1edd4a9306dca531d9bb0bff68218b62f Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 7 Jan 2024 03:00:06 -0300
Subject: [PATCH 04/33] Update import statements in lapack module

---
 lapack/lapack64/dgesv.v  |  4 +--
 lapack/lapack64/dgetrf.v |  2 +-
 lapack/lapack64/dgetrs.v | 67 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 70 insertions(+), 3 deletions(-)
 create mode 100644 lapack/lapack64/dgetrs.v

diff --git a/lapack/lapack64/dgesv.v b/lapack/lapack64/dgesv.v
index dc8b61bdf..03bbb93d3 100644
--- a/lapack/lapack64/dgesv.v
+++ b/lapack/lapack64/dgesv.v
@@ -1,7 +1,7 @@
 module lapack
 
 import math
-import vsl.blas.blas64
+import vsl.blas
 
 // dgesv computes the solution to a real system of linear equations
 //
@@ -32,7 +32,7 @@ pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb
 	if lda < math.max(1, n) {
 		panic(bad_ld_a)
 	}
-	if ldb < math.max(1, n) {
+	if ldb < math.max(1, nrhs) {
 		panic(bad_ld_b)
 	}
 
diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v
index 54e383eac..370f7a5e1 100644
--- a/lapack/lapack64/dgetrf.v
+++ b/lapack/lapack64/dgetrf.v
@@ -1,7 +1,7 @@
 module lapack
 
 import math
-import vsl.blas.blas64
+import vsl.blas
 
 // dgetrf computes the LU decomposition of an m×n matrix A using partial
 // pivoting with row interchanges.
diff --git a/lapack/lapack64/dgetrs.v b/lapack/lapack64/dgetrs.v
new file mode 100644
index 000000000..e26b661c8
--- /dev/null
+++ b/lapack/lapack64/dgetrs.v
@@ -0,0 +1,67 @@
+module lapack
+
+import math
+import vsl.blas
+
+// dgetrs solves a system of equations using an LU factorization.
+// The system of equations solved is
+//
+//	A * X = B  if trans == blas.Trans
+//	Aᵀ * X = B if trans == blas.NoTrans
+//
+// A is a general n×n matrix with stride lda. B is a general matrix of size n×nrhs.
+//
+// On entry b contains the elements of the matrix B. On exit, b contains the
+// elements of X, the solution to the system of equations.
+//
+// a and ipiv contain the LU factorization of A and the permutation indices as
+// computed by Dgetrf. ipiv is zero-indexed.
+pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []float64, lda int, ipiv []int, mut b []float64, ldb int) {
+	if trans != .no_trans && trans != .trans && trans != .conj_trans {
+		panic(bad_trans)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if nrhs < 0 {
+		panic(nrhs_lt0)
+	}
+	if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	}
+	if ldb < math.max(1, nrhs) {
+		panic(bad_ld_b)
+	}
+
+	// Quick return if possible.
+	if n == 0 || nrhs == 0 {
+		return
+	}
+
+	if a.len < (n - 1) * lda + n {
+		panic(short_ab)
+	}
+	if b.len < (n - 1) * ldb + nrhs {
+		panic(short_b)
+	}
+	if ipiv.len != n {
+		panic(bad_len_ipiv)
+	}
+
+	if trans != .no_trans {
+		// Solve A * X = B.
+		dlaswp(nrhs, b, ldb, 0, n - 1, ipiv, 1)
+		// Solve L * X = B, overwriting B with X.
+		blas.dtrsm(.left, .lower, .no_trans, .unit, n, nrhs, 1, mut a, lda, mut b, ldb)
+		// Solve U * X = B, overwriting B with X.
+		blas.dtrsm(.left, .upper, .no_trans, .non_unit, n, nrhs, 1, mut a, lda, mut b,
+			ldb)
+	}
+
+	// Solve Aᵀ * X = B.
+	// Solve Uᵀ * X = B, overwriting B with X.
+	blas.dtrsm(.left, .upper, .trans, .non_unit, n, nrhs, 1, mut a, lda, mut b, ldb)
+	// Solve Lᵀ * X = B, overwriting B with X.
+	blas.dtrsm(.left, .lower, .trans, .unit, n, nrhs, 1, mut a, lda, mut b, ldb)
+	dlaswp(nrhs, b, ldb, 0, n - 1, ipiv, -1)
+}

From f33a31b91b99c8a8fee9c8ec73ae81bfddcab57b Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 7 Jan 2024 03:00:59 -0300
Subject: [PATCH 05/33] Update LAPACKE backend status in README.md

---
 lapack/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lapack/README.md b/lapack/README.md
index fccedfdfd..578ae936f 100644
--- a/lapack/README.md
+++ b/lapack/README.md
@@ -2,10 +2,10 @@
 
 This package implements Linear Algebra routines in V.
 
-| Backend  | Description                                                                                                                                                        | Status | Compilation Flags   |
-| -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------ | ------------------- |
-| BLAS     | Pure V implementation                                                                                                                                              | Stable | `NONE`              |
-| LAPACKE  | LAPACKE is a C interface to LAPACK. It is a standard part of the LAPACK distribution. Check the section [LAPACKE Backend](#lapacke-backend) for more information. | Stable | `-d vsl_lapack_lapacke`    |
+| Backend | Description                                                                                                                                                       | Status | Compilation Flags       |
+| ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------ | ----------------------- |
+| BLAS    | Pure V implementation                                                                                                                                             | WIP    | `NONE`                  |
+| LAPACKE | LAPACKE is a C interface to LAPACK. It is a standard part of the LAPACK distribution. Check the section [LAPACKE Backend](#lapacke-backend) for more information. | Stable | `-d vsl_lapack_lapacke` |
 
 Therefore, its routines are a little more _lower level_ than the ones in the package `vsl.la`.
 

From 3a0af32cf74825daa7ea51636cfc99cf9334b1a8 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 7 Jan 2024 03:01:28 -0300
Subject: [PATCH 06/33] Refactor LAPACKE backend for improved performance

---
 lapack/README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lapack/README.md b/lapack/README.md
index 578ae936f..bbd4c7a82 100644
--- a/lapack/README.md
+++ b/lapack/README.md
@@ -11,7 +11,8 @@ Therefore, its routines are a little more _lower level_ than the ones in the pac
 
 ## LAPACKE Backend
 
-We provide a backend for the LAPACKE library. This backend is probably the fastest one for all platforms
+We provide a backend for the LAPACKE library. This backend is probably
+the fastest one for all platforms
 but it requires the installation of the LAPACKE library.
 
 Use the compilation flag `-d vsl_lapack_lapacke` to use the LAPACKE backend

From 11ed0751cac53c8a5abf9344de5511cf42986c81 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 7 Jan 2024 03:02:41 -0300
Subject: [PATCH 07/33] Refactor README files for BLAS and LAPACK backends

---
 blas/README.md   | 3 ++-
 lapack/README.md | 9 +++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/blas/README.md b/blas/README.md
index a21bdf992..1413acca3 100644
--- a/blas/README.md
+++ b/blas/README.md
@@ -11,7 +11,8 @@ Therefore, its routines are a little more _lower level_ than the ones in the pac
 
 ## OpenBLAS Backend
 
-We provide a backend for the OpenBLAS library. This backend is probably the fastest one for all platforms
+We provide a backend for the OpenBLAS library. This backend is probably
+the fastest one for all platforms
 but it requires the installation of the OpenBLAS library.
 
 Use the compilation flag `-d vsl_blas_cblas` to use the OpenBLAS backend
diff --git a/lapack/README.md b/lapack/README.md
index bbd4c7a82..2ee59d40f 100644
--- a/lapack/README.md
+++ b/lapack/README.md
@@ -2,10 +2,11 @@
 
 This package implements Linear Algebra routines in V.
 
-| Backend | Description                                                                                                                                                       | Status | Compilation Flags       |
-| ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------ | ----------------------- |
-| BLAS    | Pure V implementation                                                                                                                                             | WIP    | `NONE`                  |
-| LAPACKE | LAPACKE is a C interface to LAPACK. It is a standard part of the LAPACK distribution. Check the section [LAPACKE Backend](#lapacke-backend) for more information. | Stable | `-d vsl_lapack_lapacke` |
+| Backend                                                                     | Description                                                                           | Status                  | Compilation Flags |
+| --------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | ----------------------- | ----------------- |
+| BLAS                                                                        | Pure V implementation                                                                 | WIP                     | `NONE`            |
+| LAPACKE                                                                     | LAPACKE is a C interface to LAPACK. It is a standard part of the LAPACK distribution. |
+| Check the section [LAPACKE Backend](#lapacke-backend) for more information. | Stable                                                                                | `-d vsl_lapack_lapacke` |
 
 Therefore, its routines are a little more _lower level_ than the ones in the package `vsl.la`.
 

From c3d1e2ef5110178f3550b5507af7c087636d8ff9 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 7 Jan 2024 03:06:41 -0300
Subject: [PATCH 08/33] Refactor conversion functions to be public

---
 blas/conversions.v | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/blas/conversions.v b/blas/conversions.v
index e917e4f58..553bcf16d 100644
--- a/blas/conversions.v
+++ b/blas/conversions.v
@@ -32,12 +32,12 @@ pub fn c_uplo(up bool) Uplo {
 }
 
 // l_uplo is a helper function to convert bool to Uplo
-fn l_uplo(up bool) u8 {
+pub fn l_uplo(up bool) u8 {
 	return if up { `U` } else { `L` }
 }
 
 // job_vlr is a helper function to convert bool to char
-fn job_vlr(do_calc bool) rune {
+pub fn job_vlr(do_calc bool) rune {
 	return if do_calc { `V` } else { `N` }
 }
 

From 0832f6d71d9c3afe64e40cc49b62e80904fabe6f Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 24 Mar 2024 02:06:31 -0300
Subject: [PATCH 09/33] Refactor dgetrf function to use blocked algorithm

---
 lapack/lapack64/dgetrf.v |  28 ++++
 lapack/lapack64/ilaenv.v | 272 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 300 insertions(+)
 create mode 100644 lapack/lapack64/ilaenv.v

diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v
index 370f7a5e1..9a1a10a25 100644
--- a/lapack/lapack64/dgetrf.v
+++ b/lapack/lapack64/dgetrf.v
@@ -48,4 +48,32 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) {
 	if ipiv.len < mn {
 		panic(bad_len_ipiv)
 	}
+
+	nb := ilaenv(1, 'DGETRF', ' ', m, n, -1, -1)
+
+	if nb <= 1 || nb >= mn {
+		// use the unblocked algorithm.
+		return dgetf2(m, n, mut a, lda, ipiv)
+	}
+
+	for j := 0; j < mn; j += nb {
+		jb := math.min(mn - j, nb)
+
+		// factor diagonal and subdiagonal blocks and test for exact singularity.
+		dgetf2(m - j, jb, mut a[j * lda + j..], lda, ipiv[j..j + jb])
+
+		for i := j; i <= math.min(m - 1, j + jb - 1); i++ {
+			ipiv[i] += j
+		}
+
+		// apply interchanges to columns 1..j-1.
+		dlaswp(j, mut a, lda, j, j + jb - 1, ipiv[..j + jb], 1)
+
+		if j + jb < n {
+			// apply interchanges to columns 1..j-1.
+			mut slice := unsafe { a[j + jb..] }
+			dlaswp(j, mut slice, lda, j, j + jb, ipiv[..j + jb], 1)
+			//
+		}
+	}
 }
diff --git a/lapack/lapack64/ilaenv.v b/lapack/lapack64/ilaenv.v
new file mode 100644
index 000000000..cd075a0b8
--- /dev/null
+++ b/lapack/lapack64/ilaenv.v
@@ -0,0 +1,272 @@
+module lapack64
+
+// ilaenv returns algorithm tuning parameters for the algorithm given by the
+// input string. ispec specifies the parameter to return:
+//
+//	1: The optimal block size for a blocked algorithm.
+//	2: The minimum block size for a blocked algorithm.
+//	3: The block size of unprocessed data at which a blocked algorithm should
+//	   crossover to an unblocked version.
+//	4: The number of shifts.
+//	5: The minimum column dimension for blocking to be used.
+//	6: The crossover point for SVD (to use QR factorization or not).
+//	7: The number of processors.
+//	8: The crossover point for multi-shift in QR and QZ methods for non-symmetric eigenvalue problems.
+//	9: Maximum size of the subproblems in divide-and-conquer algorithms.
+//	10: ieee infinity and NaN arithmetic can be trusted not to trap.
+//	11: ieee infinity arithmetic can be trusted not to trap.
+//	12...16: parameters for Dhseqr and related functions. See Iparmq for more
+//	         information.
+//
+// ilaenv is an internal routine. It is exported for testing purposes.
+fn ilaenv(ispec int, name string, opts string, n1 int, n2 int, n3 int, n4 int) int {
+	// TODO(btracey): Replace this with a constant lookup? A list of constants?
+	sname := name[0] == `S` || name[0] == `D`
+	cname := name[0] == `C` || name[0] == `Z`
+	if !sname && !cname {
+		panic(bad_name)
+	}
+
+	c2 := name[1..3]
+	c3 := name[3..6]
+	c4 := c3[1..3]
+
+	match ispec {
+		1 {
+			match c2 {
+				'GE' {
+					match c3 {
+						'TRF', 'TRI' {
+							return 64
+						}
+						'QRF', 'RQF', 'LQF', 'QLF', 'HRD', 'BRD' {
+							return 32
+						}
+						else {
+							panic(bad_name)
+						}
+					}
+				}
+				'PO' {
+					match c3 {
+						'TRF' {
+							return 64
+						}
+						else {
+							panic(bad_name)
+						}
+					}
+				}
+				'SY', 'HE' {
+					match c3 {
+						'TRF' {
+							return 64
+						}
+						'TRD', 'GST' {
+							return 32
+						}
+						else {
+							panic(bad_name)
+						}
+					}
+				}
+				'OR', 'UN' {
+					match c3[0] {
+						'G', 'M' {
+							match c3[1..] {
+								'QR', 'RQ', 'LQ', 'QL', 'HR', 'TR', 'BR' {
+									return 32
+								}
+								else {
+									panic(bad_name)
+								}
+							}
+						}
+						else {
+							panic(bad_name)
+						}
+					}
+				}
+				'GB', 'PB' {
+					// Assuming n4 and n2 are defined elsewhere in your code
+					match c3 {
+						'TRF' {
+							// Replace `n4` and `n2` with actual variables
+							if sname {
+								// if n4 <= 64 {
+								//     return 1
+								// }
+								return 32
+							}
+							// if n4 <= 64 {
+							//     return 1
+							// }
+							return 32
+						}
+						else {
+							panic(bad_name)
+						}
+					}
+				}
+				'PT', 'TR', 'LA' {
+					// Additional cases as per your original logic
+				}
+				'ST' {
+					if sname && c3 == 'EBZ' {
+						return 1
+					}
+					panic(bad_name)
+				}
+				else {
+					panic(bad_name)
+				}
+			}
+		}
+		2 {
+			match c2 {
+				'GE' {
+					match c3 {
+						'QRF', 'RQF', 'LQF', 'QLF', 'HRD', 'BRD', 'TRI' {
+							if sname {
+								return 2
+							}
+							return 2
+						}
+						else {
+							panic(bad_name)
+						}
+					}
+				}
+				'SY' {
+					match c3 {
+						'TRF' {
+							if sname {
+								return 8
+							}
+							return 8
+						}
+						'TRD' {
+							if sname {
+								return 2
+							}
+							panic(bad_name)
+						}
+						else {
+							panic(bad_name)
+						}
+					}
+				}
+				'HE' {
+					if c3 == 'TRD' {
+						return 2
+					}
+					panic(bad_name)
+				}
+				'OR', 'UN' {
+					if !sname {
+						panic(bad_name)
+					}
+					match c3[0] {
+						'G', 'M' {
+							match c4 {
+								'QR', 'RQ', 'LQ', 'QL', 'HR', 'TR', 'BR' {
+									return 2
+								}
+								else {
+									panic(bad_name)
+								}
+							}
+						}
+						else {
+							panic(bad_name)
+						}
+					}
+				}
+				else {
+					panic(bad_name)
+				}
+			}
+		}
+		3 {
+			match c2 {
+				'GE' {
+					match c3 {
+						'QRF', 'RQF', 'LQF', 'QLF', 'HRD', 'BRD' {
+							if sname {
+								return 128
+							}
+							return 128
+						}
+						else {
+							panic(bad_name)
+						}
+					}
+				}
+				'SY', 'HE' {
+					if c3 == 'TRD' {
+						return 32
+					}
+					panic(bad_name)
+				}
+				'OR', 'UN' {
+					match c3[0] {
+						'G' {
+							match c4 {
+								'QR', 'RQ', 'LQ', 'QL', 'HR', 'TR', 'BR' {
+									return 128
+								}
+								else {
+									panic(bad_name)
+								}
+							}
+						}
+						else {
+							panic(bad_name)
+						}
+					}
+				}
+				else {
+					panic(bad_name)
+				}
+			}
+		}
+		4 {
+			// Used by xHSEQR
+			return 6
+		}
+		5 {
+			// Not used
+			return 2
+		}
+		6 {
+			// Used by xGELSS and xGESVD
+			// Assuming n1 and n2 are defined elsewhere in your code
+			// Replace `min(n1, n2)` with actual min calculation or function
+			return int(f64(min(n1, n2)) * 1.6)
+		}
+		7 {
+			// Not used
+			return 1
+		}
+		8 {
+			// Used by xHSEQR
+			return 50
+		}
+		9 {
+			// Used by xGELSD and xGESDD
+			return 25
+		}
+		10, 11 {
+			// Go guarantees ieee
+			return 1
+		}
+		12, 13, 14, 15, 16 {
+			// dhseqr and related functions for eigenvalue problems.
+			return iparmq(ispec, name, opts, n1, n2, n3, n4)
+		}
+		else {
+			panic(bad_ispec)
+		}
+	}
+	return 0
+}

From 88776da450e83f4393e26e5f41a69c2f222f1042 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 28 Apr 2024 02:35:53 -0300
Subject: [PATCH 10/33] Refactor dgetrf function to use blocked algorithm

---
 ...ck_lapacke copy.v => cflags_d_vsl_lapack_lapacke.v} |  0
 lapack/lapack64/dgetrf.v                               | 10 +++++++++-
 ml/knn.v                                               |  2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)
 rename lapack/{cflags_d_vsl_lapack_lapacke copy.v => cflags_d_vsl_lapack_lapacke.v} (100%)

diff --git a/lapack/cflags_d_vsl_lapack_lapacke copy.v b/lapack/cflags_d_vsl_lapack_lapacke.v
similarity index 100%
rename from lapack/cflags_d_vsl_lapack_lapacke copy.v
rename to lapack/cflags_d_vsl_lapack_lapacke.v
diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v
index 9a1a10a25..89aee7a63 100644
--- a/lapack/lapack64/dgetrf.v
+++ b/lapack/lapack64/dgetrf.v
@@ -73,7 +73,15 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) {
 			// apply interchanges to columns 1..j-1.
 			mut slice := unsafe { a[j + jb..] }
 			dlaswp(j, mut slice, lda, j, j + jb, ipiv[..j + jb], 1)
-			//
+
+			blas.dtstrf(.left, .lower, .notrans, .unit, jb, n - j - jb, 1, a[j * lda + j..],
+				lda, a[j * lda + j + jb..], lda)
+
+			if j + jb < m {
+				blas.dgemm(.notrans, .notrans, m - j - jb, n - j - jb, jb, -1, a[(j + jb) * lda + j..],
+					lda, a[j * lda + j + jb..], lda, 1, a[(j + jb) * lda + j + jb..],
+					lda)
+			}
 		}
 	}
 }
diff --git a/ml/knn.v b/ml/knn.v
index 407cbfec0..c84db9397 100644
--- a/ml/knn.v
+++ b/ml/knn.v
@@ -112,7 +112,7 @@ pub struct PredictConfig {
 pub:
 	max_iter int
 	k        int
-	to_pred []f64
+	to_pred  []f64
 }
 
 // predict will find the `k` points nearest to the specified `to_pred`.

From b84f2ba3e49c2d182114c44323094c0898c2c6ca Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 28 Apr 2024 02:48:28 -0300
Subject: [PATCH 11/33] Refactor dgetrf function to use blocked algorithm and
 fix variable naming

---
 lapack/lapack64/dgetrf.v | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v
index 89aee7a63..0bacc4a85 100644
--- a/lapack/lapack64/dgetrf.v
+++ b/lapack/lapack64/dgetrf.v
@@ -71,16 +71,17 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) {
 
 		if j + jb < n {
 			// apply interchanges to columns 1..j-1.
-			mut slice := unsafe { a[j + jb..] }
-			dlaswp(j, mut slice, lda, j, j + jb, ipiv[..j + jb], 1)
+			mut slice1 := unsafe { a[j + jb..] }
+			dlaswp(j, mut slice1, lda, j, j + jb, ipiv[..j + jb], 1)
 
+			mut slice2 := unsafe { a[j * lda + j + jb..] }
 			blas.dtstrf(.left, .lower, .notrans, .unit, jb, n - j - jb, 1, a[j * lda + j..],
-				lda, a[j * lda + j + jb..], lda)
+				lda, mut slice2, lda)
 
 			if j + jb < m {
+				mut slice3 := unsafe { a[(j + jb) * lda + j + jb..] }
 				blas.dgemm(.notrans, .notrans, m - j - jb, n - j - jb, jb, -1, a[(j + jb) * lda + j..],
-					lda, a[j * lda + j + jb..], lda, 1, a[(j + jb) * lda + j + jb..],
-					lda)
+					lda, a[j * lda + j + jb..], lda, 1, mut slice3, lda)
 			}
 		}
 	}

From ab34e62cbd8680190154379bf8875b6c72180497 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 28 Apr 2024 03:11:21 -0300
Subject: [PATCH 12/33] Refactor LAPACK functions to use row-major memory
 layout

---
 ..._common.v => lapack_d_vsl_lapack_common.v} |   0
 lapack/lapack_notd_vsl_lapack_common.v        | 184 ++++++++++++++++++
 2 files changed, 184 insertions(+)
 rename lapack/{lapack_common.v => lapack_d_vsl_lapack_common.v} (100%)
 create mode 100644 lapack/lapack_notd_vsl_lapack_common.v

diff --git a/lapack/lapack_common.v b/lapack/lapack_d_vsl_lapack_common.v
similarity index 100%
rename from lapack/lapack_common.v
rename to lapack/lapack_d_vsl_lapack_common.v
diff --git a/lapack/lapack_notd_vsl_lapack_common.v b/lapack/lapack_notd_vsl_lapack_common.v
new file mode 100644
index 000000000..3635ed6cd
--- /dev/null
+++ b/lapack/lapack_notd_vsl_lapack_common.v
@@ -0,0 +1,184 @@
+module lapack
+
+import vsl.errors
+import vsl.blas
+import vsl.lapack.lapack64
+
+fn C.LAPACKE_dgesvd(matrix_layout blas.MemoryLayout, jobu &char, jobvt &char, m int, n int, a &f64, lda int, s &f64, u &f64, ldu int, vt &f64, ldvt int, superb &f64) int
+
+fn C.LAPACKE_dgetri(matrix_layout blas.MemoryLayout, n int, a &f64, lda int, ipiv &int) int
+
+fn C.LAPACKE_dpotrf(matrix_layout blas.MemoryLayout, up u32, n int, a &f64, lda int) int
+
+fn C.LAPACKE_dgeev(matrix_layout blas.MemoryLayout, calc_vl &char, calc_vr &char, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int
+
+fn C.LAPACKE_dsyev(matrix_layout blas.MemoryLayout, jobz byte, uplo byte, n int, a &f64, lda int, w &f64, work &f64, lwork int) int
+
+fn C.LAPACKE_dgebal(matrix_layout blas.MemoryLayout, job &char, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int
+
+fn C.LAPACKE_dgehrd(matrix_layout blas.MemoryLayout, n int, ilo int, ihi int, a &f64, lda int, tau &f64, work &f64, lwork int) int
+
+// dgesv computes the solution to a real system of linear equations.
+//
+// See: http://www.netlib.org/lapack/explore-html/d8/d72/dgesv_8f.html
+//
+// See: https://software.intel.com/en-us/mkl-developer-reference-c-gesv
+//
+// The system is:
+//
+// A * X = B,
+//
+// where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
+//
+// The LU decomposition with partial pivoting and row interchanges is
+// used to factor A as
+//
+// A = P * L * U,
+//
+// where P is a permutation matrix, L is unit lower triangular, and U is
+// upper triangular.  The factored form of A is then used to solve the
+// system of equations A * X = B.
+//
+// NOTE: matrix 'a' will be modified
+@[inline]
+pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb int) {
+	lapack64.dgesv(n, nrhs, mut a, lda, ipiv, mut b, ldb)
+}
+
+// dgesvd computes the singular value decomposition (SVD) of a real M-by-N matrix A, optionally computing the left and/or right singular vectors.
+//
+// See: http://www.netlib.org/lapack/explore-html/d8/d2d/dgesvd_8f.html
+//
+// See: https://software.intel.com/en-us/mkl-developer-reference-c-gesvd
+//
+// The SVD is written
+//
+// A = U * SIGMA * transpose(V)
+//
+// where SIGMA is an M-by-N matrix which is zero except for its
+// min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
+// V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
+// are the singular values of A; they are real and non-negative, and
+// are returned in descending order.  The first min(m,n) columns of
+// U and V are the left and right singular vectors of A.
+//
+// Note that the routine returns V**T, not V.
+//
+// NOTE: matrix 'a' will be modified
+pub fn dgesvd(jobu &char, jobvt &char, m int, n int, a []f64, lda int, s []f64, u []f64, ldu int, vt []f64, ldvt int, superb []f64) {
+	info := C.LAPACKE_dgesvd(.row_major, jobu, jobvt, m, n, &a[0], lda, &s[0], &u[0],
+		ldu, &vt[0], ldvt, &superb[0])
+	if info != 0 {
+		errors.vsl_panic('lapack failed', .efailed)
+	}
+}
+
+// dgetrf computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges.
+//
+// See: http://www.netlib.org/lapack/explore-html/d3/d6a/dgetrf_8f.html
+//
+// See: https://software.intel.com/en-us/mkl-developer-reference-c-getrf
+//
+// The factorization has the form
+// A = P * L * U
+// where P is a permutation matrix, L is lower triangular with unit
+// diagonal elements (lower trapezoidal if m > n), and U is upper
+// triangular (upper trapezoidal if m < n).
+//
+// This is the right-looking Level 3 BLAS version of the algorithm.
+//
+// NOTE: (1) matrix 'a' will be modified
+// (2) ipiv indices are 1-based (i.e. Fortran)
+pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) {
+	lapack64.dgetrf(m, n, mut a, lda, ipiv)
+}
+
+// dgetri computes the inverse of a matrix using the LU factorization computed by DGETRF.
+//
+// See: http://www.netlib.org/lapack/explore-html/df/da4/dgetri_8f.html
+//
+// See: https://software.intel.com/en-us/mkl-developer-reference-c-getri
+//
+// This method inverts U and then computes inv(A) by solving the system
+// inv(A)*L = inv(U) for inv(A).
+pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) {
+	unsafe {
+		info := C.LAPACKE_dgetri(.row_major, n, &a[0], lda, &ipiv[0])
+		if info != 0 {
+			errors.vsl_panic('lapack failed', .efailed)
+		}
+	}
+}
+
+// dpotrf computes the Cholesky factorization of a real symmetric positive definite matrix A.
+//
+// See: http://www.netlib.org/lapack/explore-html/d0/d8a/dpotrf_8f.html
+//
+// See: https://software.intel.com/en-us/mkl-developer-reference-c-potrf
+//
+// The factorization has the form
+//
+// A = U**T * U,  if UPLO = 'U'
+//
+// or
+//
+// A = L  * L**T,  if UPLO = 'L'
+//
+// where U is an upper triangular matrix and L is lower triangular.
+//
+// This is the block version of the algorithm, calling Level 3 BLAS.
+pub fn dpotrf(up bool, n int, mut a []f64, lda int) {
+	unsafe {
+		info := C.LAPACKE_dpotrf(.row_major, blas.l_uplo(up), n, &a[0], lda)
+		if info != 0 {
+			errors.vsl_panic('lapack failed', .efailed)
+		}
+	}
+}
+
+// dgeev computes for an N-by-N real nonsymmetric matrix A, the
+// eigenvalues and, optionally, the left and/or right eigenvectors.
+//
+// See: http://www.netlib.org/lapack/explore-html/d9/d28/dgeev_8f.html
+//
+// See: https://software.intel.com/en-us/mkl-developer-reference-c-geev
+//
+// See: https://www.nag.co.uk/numeric/fl/nagdoc_fl26/html/f08/f08naf.html
+//
+// The right eigenvector v(j) of A satisfies
+//
+// A * v(j) = lambda(j) * v(j)
+//
+// where lambda(j) is its eigenvalue.
+//
+// The left eigenvector u(j) of A satisfies
+//
+// u(j)**H * A = lambda(j) * u(j)**H
+//
+// where u(j)**H denotes the conjugate-transpose of u(j).
+//
+// The computed eigenvectors are normalized to have Euclidean norm
+// equal to 1 and largest component real.
+pub fn dgeev(calc_vl bool, calc_vr bool, n int, mut a []f64, lda int, wr []f64, wi []f64, vl []f64, ldvl_ int, vr []f64, ldvr_ int) {
+	mut vvl := 0.0
+	mut vvr := 0.0
+	mut ldvl := ldvl_
+	mut ldvr := ldvr_
+	if calc_vl {
+		vvl = vl[0]
+	} else {
+		ldvl = 1
+	}
+	if calc_vr {
+		vvr = vr[0]
+	} else {
+		ldvr = 1
+	}
+	unsafe {
+		info := C.LAPACKE_dgeev(.row_major, &char(blas.job_vlr(calc_vl).str().str), &char(blas.job_vlr(calc_vr).str().str),
+			n, &a[0], lda, &wr[0], &wi[0], &vvl, ldvl, &vvr, ldvr)
+		if info != 0 {
+			errors.vsl_panic('lapack failed', .efailed)
+		}
+	}
+}

From 14d3f672380826f5232ec2d5d2aab5f0839f271a Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 28 Apr 2024 03:14:07 -0300
Subject: [PATCH 13/33] Refactor LAPACK module to use lapack64 module

---
 lapack/lapack64/dgesv.v  | 2 +-
 lapack/lapack64/dgetrf.v | 2 +-
 lapack/lapack64/dgetrs.v | 2 +-
 lapack/lapack64/errors.v | 2 +-
 lapack/lapack64/ilaenv.v | 6 +++---
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/lapack/lapack64/dgesv.v b/lapack/lapack64/dgesv.v
index 03bbb93d3..d0d7f90a5 100644
--- a/lapack/lapack64/dgesv.v
+++ b/lapack/lapack64/dgesv.v
@@ -1,4 +1,4 @@
-module lapack
+module lapack64
 
 import math
 import vsl.blas
diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v
index 0bacc4a85..e1b6553fc 100644
--- a/lapack/lapack64/dgetrf.v
+++ b/lapack/lapack64/dgetrf.v
@@ -1,4 +1,4 @@
-module lapack
+module lapack64
 
 import math
 import vsl.blas
diff --git a/lapack/lapack64/dgetrs.v b/lapack/lapack64/dgetrs.v
index e26b661c8..71d219723 100644
--- a/lapack/lapack64/dgetrs.v
+++ b/lapack/lapack64/dgetrs.v
@@ -1,4 +1,4 @@
-module lapack
+module lapack64
 
 import math
 import vsl.blas
diff --git a/lapack/lapack64/errors.v b/lapack/lapack64/errors.v
index d17defc08..91a96995f 100644
--- a/lapack/lapack64/errors.v
+++ b/lapack/lapack64/errors.v
@@ -1,4 +1,4 @@
-module lapack
+module lapack64
 
 // This list is duplicated in netlib/lapack/netlib. Keep in sync.
 
diff --git a/lapack/lapack64/ilaenv.v b/lapack/lapack64/ilaenv.v
index cd075a0b8..52d2e4e7d 100644
--- a/lapack/lapack64/ilaenv.v
+++ b/lapack/lapack64/ilaenv.v
@@ -72,7 +72,7 @@ fn ilaenv(ispec int, name string, opts string, n1 int, n2 int, n3 int, n4 int) i
 				}
 				'OR', 'UN' {
 					match c3[0] {
-						'G', 'M' {
+						`G`, `M` {
 							match c3[1..] {
 								'QR', 'RQ', 'LQ', 'QL', 'HR', 'TR', 'BR' {
 									return 32
@@ -167,7 +167,7 @@ fn ilaenv(ispec int, name string, opts string, n1 int, n2 int, n3 int, n4 int) i
 						panic(bad_name)
 					}
 					match c3[0] {
-						'G', 'M' {
+						`G`, `M` {
 							match c4 {
 								'QR', 'RQ', 'LQ', 'QL', 'HR', 'TR', 'BR' {
 									return 2
@@ -210,7 +210,7 @@ fn ilaenv(ispec int, name string, opts string, n1 int, n2 int, n3 int, n4 int) i
 				}
 				'OR', 'UN' {
 					match c3[0] {
-						'G' {
+						`G` {
 							match c4 {
 								'QR', 'RQ', 'LQ', 'QL', 'HR', 'TR', 'BR' {
 									return 128

From 02da1671ed19780d29c5e1f80441091d852687e9 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 28 Apr 2024 03:18:30 -0300
Subject: [PATCH 14/33] Refactor dgetrs function to use f64 instead of float64
 for array types

---
 lapack/lapack64/dgetrs.v | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lapack/lapack64/dgetrs.v b/lapack/lapack64/dgetrs.v
index 71d219723..0458cc1e2 100644
--- a/lapack/lapack64/dgetrs.v
+++ b/lapack/lapack64/dgetrs.v
@@ -16,7 +16,7 @@ import vsl.blas
 //
 // a and ipiv contain the LU factorization of A and the permutation indices as
 // computed by Dgetrf. ipiv is zero-indexed.
-pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []float64, lda int, ipiv []int, mut b []float64, ldb int) {
+pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb int) {
 	if trans != .no_trans && trans != .trans && trans != .conj_trans {
 		panic(bad_trans)
 	}

From e43bfa574a4f026c01d8106d9824a43c5e4e1eae Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Tue, 18 Jun 2024 01:41:06 -0300
Subject: [PATCH 15/33] refactor: Update create_image_2d function to use local
 variable for format

---
 blas/oblas_d_vsl_blas_cblas.v                 | 648 +++++++++++++++++-
 la/matrix_ops.v                               |   2 +-
 lapack/conversions.v                          |  70 ++
 lapack/lapack64/conversions.v                 | 199 ++++++
 lapack/lapack64/dgebal.v                      |  33 +
 lapack/lapack64/dgeev.v                       |  39 ++
 lapack/lapack64/dgehrd.v                      |  35 +
 lapack/lapack64/dgesv.v                       |   4 +-
 lapack/lapack64/dgesvd.v                      |  42 ++
 lapack/lapack64/dgetrf.v                      |  11 +-
 lapack/lapack64/dgetri.v                      |  31 +
 lapack/lapack64/dgetrs.v                      |   9 +-
 lapack/lapack64/dpotrf.v                      |  33 +
 lapack/lapack64/dsyev.v                       |  38 +
 lapack/lapack64/ilaenv.v                      |   4 +-
 ...common.v => lapack_d_vsl_lapack_lapacke.v} |  30 +-
 ...mon.v => lapack_notd_vsl_lapack_lapacke.v} |  56 +-
 17 files changed, 1201 insertions(+), 83 deletions(-)
 create mode 100644 lapack/conversions.v
 create mode 100644 lapack/lapack64/conversions.v
 create mode 100644 lapack/lapack64/dgebal.v
 create mode 100644 lapack/lapack64/dgeev.v
 create mode 100644 lapack/lapack64/dgehrd.v
 create mode 100644 lapack/lapack64/dgesvd.v
 create mode 100644 lapack/lapack64/dgetri.v
 create mode 100644 lapack/lapack64/dpotrf.v
 create mode 100644 lapack/lapack64/dsyev.v
 rename lapack/{lapack_d_vsl_lapack_common.v => lapack_d_vsl_lapack_lapacke.v} (78%)
 rename lapack/{lapack_notd_vsl_lapack_common.v => lapack_notd_vsl_lapack_lapacke.v} (70%)

diff --git a/blas/oblas_d_vsl_blas_cblas.v b/blas/oblas_d_vsl_blas_cblas.v
index 29be3be06..777038f3d 100644
--- a/blas/oblas_d_vsl_blas_cblas.v
+++ b/blas/oblas_d_vsl_blas_cblas.v
@@ -221,6 +221,16 @@ pub fn dasum(n int, x []f64, incx int) f64 {
 	return C.cblas_dasum(n, unsafe { &x[0] }, incx)
 }
 
+@[inline]
+pub fn scasum(n int, x voidptr, incx int) f32 {
+	return C.cblas_scasum(n, x, incx)
+}
+
+@[inline]
+pub fn dzasum(n int, x voidptr, incx int) f64 {
+	return C.cblas_dzasum(n, x, incx)
+}
+
 @[inline]
 pub fn ssum(n int, x []f32, incx int) f32 {
 	return C.cblas_ssum(n, unsafe { &x[0] }, incx)
@@ -241,6 +251,16 @@ pub fn dnrm2(n int, x []f64, incx int) f64 {
 	return C.cblas_dnrm2(n, unsafe { &x[0] }, incx)
 }
 
+@[inline]
+pub fn scnrm2(n int, x voidptr, incx int) f32 {
+	return C.cblas_scnrm2(n, x, incx)
+}
+
+@[inline]
+pub fn dznrm2(n int, x voidptr, incx int) f64 {
+	return C.cblas_dznrm2(n, x, incx)
+}
+
 @[inline]
 pub fn isamax(n int, x []f32, incx int) int {
 	return C.cblas_isamax(n, unsafe { &x[0] }, incx)
@@ -251,16 +271,36 @@ pub fn idamax(n int, x []f64, incx int) int {
 	return C.cblas_idamax(n, unsafe { &x[0] }, incx)
 }
 
+@[inline]
+pub fn icamax(n int, x voidptr, incx int) int {
+	return C.cblas_icamax(n, x, incx)
+}
+
+@[inline]
+pub fn izamax(n int, x voidptr, incx int) int {
+	return C.cblas_izamax(n, x, incx)
+}
+
 @[inline]
 pub fn isamin(n int, x []f32, incx int) int {
 	return C.cblas_isamin(n, unsafe { &x[0] }, incx)
 }
 
 @[inline]
-pub fn idamin(n int, x &f64, incx int) int {
+pub fn idamin(n int, x []f64, incx int) int {
 	return C.cblas_idamin(n, unsafe { &x[0] }, incx)
 }
 
+@[inline]
+pub fn icamin(n int, x voidptr, incx int) int {
+	return C.cblas_icamin(n, x, incx)
+}
+
+@[inline]
+pub fn izamin(n int, x voidptr, incx int) int {
+	return C.cblas_izamin(n, x, incx)
+}
+
 @[inline]
 pub fn ismax(n int, x []f32, incx int) int {
 	return C.cblas_ismax(n, unsafe { &x[0] }, incx)
@@ -271,6 +311,16 @@ pub fn idmax(n int, x []f64, incx int) int {
 	return C.cblas_idmax(n, unsafe { &x[0] }, incx)
 }
 
+@[inline]
+pub fn icmax(n int, x voidptr, incx int) int {
+	return C.cblas_icmax(n, x, incx)
+}
+
+@[inline]
+pub fn izmax(n int, x voidptr, incx int) int {
+	return C.cblas_izmax(n, x, incx)
+}
+
 @[inline]
 pub fn ismin(n int, x []f32, incx int) int {
 	return C.cblas_ismin(n, unsafe { &x[0] }, incx)
@@ -281,6 +331,16 @@ pub fn idmin(n int, x []f64, incx int) int {
 	return C.cblas_idmin(n, unsafe { &x[0] }, incx)
 }
 
+@[inline]
+pub fn icmin(n int, x voidptr, incx int) int {
+	return C.cblas_icmin(n, x, incx)
+}
+
+@[inline]
+pub fn izmin(n int, x voidptr, incx int) int {
+	return C.cblas_izmin(n, x, incx)
+}
+
 @[inline]
 pub fn saxpy(n int, alpha f32, x []f32, incx int, mut y []f32, incy int) {
 	C.cblas_saxpy(n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
@@ -292,15 +352,35 @@ pub fn daxpy(n int, alpha f64, x []f64, incx int, mut y []f64, incy int) {
 }
 
 @[inline]
-pub fn scopy(n int, mut x []f32, incx int, mut y []f32, incy int) {
+pub fn caxpy(n int, alpha voidptr, x voidptr, incx int, mut y voidptr, incy int) {
+	C.cblas_caxpy(n, alpha, x, incx, y, incy)
+}
+
+@[inline]
+pub fn zaxpy(n int, alpha voidptr, x voidptr, incx int, mut y voidptr, incy int) {
+	C.cblas_zaxpy(n, alpha, x, incx, y, incy)
+}
+
+@[inline]
+pub fn scopy(n int, x []f32, incx int, mut y []f32, incy int) {
 	C.cblas_scopy(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
 }
 
 @[inline]
-pub fn dcopy(n int, mut x []f64, incx int, mut y []f64, incy int) {
+pub fn dcopy(n int, x []f64, incx int, mut y []f64, incy int) {
 	C.cblas_dcopy(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
 }
 
+@[inline]
+pub fn ccopy(n int, x voidptr, incx int, mut y voidptr, incy int) {
+	C.cblas_ccopy(n, x, incx, y, incy)
+}
+
+@[inline]
+pub fn zcopy(n int, x voidptr, incx int, mut y voidptr, incy int) {
+	C.cblas_zcopy(n, x, incx, y, incy)
+}
+
 @[inline]
 pub fn sswap(n int, mut x []f32, incx int, mut y []f32, incy int) {
 	C.cblas_sswap(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
@@ -311,6 +391,16 @@ pub fn dswap(n int, mut x []f64, incx int, mut y []f64, incy int) {
 	C.cblas_dswap(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy)
 }
 
+@[inline]
+pub fn cswap(n int, x voidptr, incx int, y voidptr, incy int) {
+	C.cblas_cswap(n, x, incx, y, incy)
+}
+
+@[inline]
+pub fn zswap(n int, x voidptr, incx int, y voidptr, incy int) {
+	C.cblas_zswap(n, x, incx, y, incy)
+}
+
 @[inline]
 pub fn srot(n int, mut x []f32, incx int, mut y []f32, incy int, c f32, s f32) {
 	C.cblas_srot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, c, s)
@@ -332,22 +422,22 @@ pub fn drotg(a f64, b f64, c f64, s f64) {
 }
 
 @[inline]
-pub fn srotm(n int, x []f32, incx int, y []f32, incy int, p []f32) {
+pub fn srotm(n int, mut x []f32, incx int, mut y []f32, incy int, p []f32) {
 	C.cblas_srotm(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, unsafe { &p[0] })
 }
 
 @[inline]
-pub fn drotm(n int, x []f64, incx int, y []f64, incy int, p []f64) {
+pub fn drotm(n int, mut x []f64, incx int, mut y []f64, incy int, p []f64) {
 	C.cblas_drotm(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, unsafe { &p[0] })
 }
 
 @[inline]
-pub fn srotmg(d1 f32, d2 f32, b1 f32, b2 f32, p []f32) {
+pub fn srotmg(d1 f32, d2 f32, b1 f32, b2 f32, mut p []f32) {
 	C.cblas_srotmg(&d1, &d2, &b1, b2, unsafe { &p[0] })
 }
 
 @[inline]
-pub fn drotmg(d1 f64, d2 f64, b1 f64, b2 f32, p []f64) {
+pub fn drotmg(d1 f64, d2 f64, b1 f64, b2 f32, mut p []f64) {
 	C.cblas_drotmg(&d1, &d2, &b1, b2, unsafe { &p[0] })
 }
 
@@ -361,6 +451,26 @@ pub fn dscal(n int, alpha f64, mut x []f64, incx int) {
 	C.cblas_dscal(n, alpha, unsafe { &x[0] }, incx)
 }
 
+@[inline]
+pub fn cscal(n int, alpha voidptr, mut x voidptr, incx int) {
+	C.cblas_cscal(n, alpha, x, incx)
+}
+
+@[inline]
+pub fn zscal(n int, alpha voidptr, mut x voidptr, incx int) {
+	C.cblas_zscal(n, alpha, x, incx)
+}
+
+@[inline]
+pub fn csscal(n int, alpha f32, mut x voidptr, incx int) {
+	C.cblas_csscal(n, alpha, x, incx)
+}
+
+@[inline]
+pub fn zdscal(n int, alpha f64, mut x voidptr, incx int) {
+	C.cblas_zdscal(n, alpha, x, incx)
+}
+
 @[inline]
 pub fn sgemv(trans bool, m int, n int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) {
 	C.cblas_sgemv(.row_major, c_trans(trans), m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
@@ -373,6 +483,16 @@ pub fn dgemv(trans bool, m int, n int, alpha f64, a []f64, lda int, x []f64, inc
 		incx, beta, unsafe { &y[0] }, incy)
 }
 
+@[inline]
+pub fn cgemv(trans bool, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_cgemv(.row_major, c_trans(trans), m, n, alpha, a, lda, x, incx, beta, y, incy)
+}
+
+@[inline]
+pub fn zgemv(trans bool, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_zgemv(.row_major, c_trans(trans), m, n, alpha, a, lda, x, incx, beta, y, incy)
+}
+
 @[inline]
 pub fn sger(m int, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32, lda int) {
 	C.cblas_sger(.row_major, m, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy,
@@ -386,29 +506,69 @@ pub fn dger(m int, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a
 }
 
 @[inline]
-pub fn strsv(uplo bool, trans_a bool, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) {
-	C.cblas_strsv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] },
+pub fn cgeru(m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) {
+	C.cblas_cgeru(.row_major, m, n, alpha, x, incx, y, incy, a, lda)
+}
+
+@[inline]
+pub fn cgerc(m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) {
+	C.cblas_cgerc(.row_major, m, n, alpha, x, incx, y, incy, a, lda)
+}
+
+@[inline]
+pub fn zgeru(m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) {
+	C.cblas_zgeru(.row_major, m, n, alpha, x, incx, y, incy, a, lda)
+}
+
+@[inline]
+pub fn zgerc(m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) {
+	C.cblas_zgerc(.row_major, m, n, alpha, x, incx, y, incy, a, lda)
+}
+
+@[inline]
+pub fn strsv(uplo bool, trans bool, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) {
+	C.cblas_strsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &a[0] },
 		lda, unsafe { &x[0] }, incx)
 }
 
 @[inline]
-pub fn dtrsv(uplo bool, trans_a bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
-	C.cblas_dtrsv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] },
+pub fn dtrsv(uplo bool, trans bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
+	C.cblas_dtrsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &a[0] },
 		lda, unsafe { &x[0] }, incx)
 }
 
 @[inline]
-pub fn strmv(uplo bool, trans_a bool, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) {
-	C.cblas_strmv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] },
+pub fn ctrsv(uplo bool, trans bool, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) {
+	C.cblas_ctrsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, a, lda, x, incx)
+}
+
+@[inline]
+pub fn ztrsv(uplo bool, trans bool, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) {
+	C.cblas_ztrsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, a, lda, x, incx)
+}
+
+@[inline]
+pub fn strmv(uplo bool, trans bool, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) {
+	C.cblas_strmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &a[0] },
 		lda, unsafe { &x[0] }, incx)
 }
 
 @[inline]
-pub fn dtrmv(uplo bool, trans_a bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
-	C.cblas_dtrmv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] },
+pub fn dtrmv(uplo bool, trans bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
+	C.cblas_dtrmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &a[0] },
 		lda, unsafe { &x[0] }, incx)
 }
 
+@[inline]
+pub fn ctrmv(uplo bool, trans bool, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) {
+	C.cblas_ctrmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, a, lda, x, incx)
+}
+
+@[inline]
+pub fn ztrmv(uplo bool, trans bool, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) {
+	C.cblas_ztrmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, a, lda, x, incx)
+}
+
 @[inline]
 pub fn ssyr(uplo bool, n int, alpha f32, x []f32, incx int, mut a []f32, lda int) {
 	C.cblas_ssyr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] },
@@ -421,6 +581,16 @@ pub fn dsyr(uplo bool, n int, alpha f64, x []f64, incx int, mut a []f64, lda int
 		lda)
 }
 
+@[inline]
+pub fn cher(uplo bool, n int, alpha f32, x voidptr, incx int, mut a voidptr, lda int) {
+	C.cblas_cher(.row_major, c_uplo(uplo), n, alpha, x, incx, a, lda)
+}
+
+@[inline]
+pub fn zher(uplo bool, n int, alpha f64, x voidptr, incx int, mut a voidptr, lda int) {
+	C.cblas_zher(.row_major, c_uplo(uplo), n, alpha, x, incx, a, lda)
+}
+
 @[inline]
 pub fn ssyr2(uplo bool, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32, lda int) {
 	C.cblas_ssyr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] },
@@ -434,9 +604,451 @@ pub fn dsyr2(uplo bool, n int, alpha f64, x []f64, incx int, y []f64, incy int,
 }
 
 @[inline]
-pub fn sgemm(trans_a bool, trans_b bool, m int, n int, k int, alpha f32, a []f32, lda int, b []f32, ldb int, beta f32, mut cc []f32, ldc int) {
-	C.cblas_sgemm(.row_major, c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, unsafe { &a[0] },
-		lda, unsafe { &b[0] }, ldb, beta, unsafe { &cc[0] }, ldc)
+pub fn cher2(uplo bool, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) {
+	C.cblas_cher2(.row_major, c_uplo(uplo), n, alpha, x, incx, y, incy, a, lda)
+}
+
+@[inline]
+pub fn zher2(uplo bool, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) {
+	C.cblas_zher2(.row_major, c_uplo(uplo), n, alpha, x, incx, y, incy, a, lda)
+}
+
+@[inline]
+pub fn sgbmv(trans bool, m int, n int, kl int, ku int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) {
+	C.cblas_sgbmv(.row_major, c_trans(trans), m, n, kl, ku, alpha, unsafe { &a[0] }, lda,
+		unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn dgbmv(trans bool, m int, n int, kl int, ku int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
+	C.cblas_dgbmv(.row_major, c_trans(trans), m, n, kl, ku, alpha, unsafe { &a[0] }, lda,
+		unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn cgbmv(trans bool, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_cgbmv(.row_major, c_trans(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta,
+		y, incy)
+}
+
+@[inline]
+pub fn zgbmv(trans bool, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_zgbmv(.row_major, c_trans(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta,
+		y, incy)
+}
+
+@[inline]
+pub fn ssbmv(uplo bool, n int, k int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) {
+	C.cblas_ssbmv(.row_major, c_uplo(uplo), n, k, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
+		incx, beta, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn dsbmv(uplo bool, n int, k int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
+	C.cblas_dsbmv(.row_major, c_uplo(uplo), n, k, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
+		incx, beta, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn stbmv(uplo bool, trans bool, diag Diagonal, n int, k int, a []f32, lda int, mut x []f32, incx int) {
+	C.cblas_stbmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, unsafe { &a[0] },
+		lda, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn dtbmv(uplo bool, trans bool, diag Diagonal, n int, k int, a []f64, lda int, mut x []f64, incx int) {
+	C.cblas_dtbmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, unsafe { &a[0] },
+		lda, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn ctbmv(uplo bool, trans bool, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) {
+	C.cblas_ctbmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, a, lda, x, incx)
+}
+
+@[inline]
+pub fn ztbmv(uplo bool, trans bool, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) {
+	C.cblas_ztbmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, a, lda, x, incx)
+}
+
+@[inline]
+pub fn stbsv(uplo bool, trans bool, diag Diagonal, n int, k int, a []f32, lda int, mut x []f32, incx int) {
+	C.cblas_stbsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, unsafe { &a[0] },
+		lda, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn dtbsv(uplo bool, trans bool, diag Diagonal, n int, k int, a []f64, lda int, mut x []f64, incx int) {
+	C.cblas_dtbsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, unsafe { &a[0] },
+		lda, unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn ctbsv(uplo bool, trans bool, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) {
+	C.cblas_ctbsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, a, lda, x, incx)
+}
+
+@[inline]
+pub fn ztbsv(uplo bool, trans bool, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) {
+	C.cblas_ztbsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, a, lda, x, incx)
+}
+
+@[inline]
+pub fn stpmv(uplo bool, trans bool, diag Diagonal, n int, ap []f32, mut x []f32, incx int) {
+	C.cblas_stpmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &ap[0] },
+		unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn dtpmv(uplo bool, trans bool, diag Diagonal, n int, ap []f64, mut x []f64, incx int) {
+	C.cblas_dtpmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &ap[0] },
+		unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn ctpmv(uplo bool, trans bool, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) {
+	C.cblas_ctpmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, ap, x, incx)
+}
+
+@[inline]
+pub fn ztpmv(uplo bool, trans bool, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) {
+	C.cblas_ztpmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, ap, x, incx)
+}
+
+@[inline]
+pub fn stpsv(uplo bool, trans bool, diag Diagonal, n int, ap []f32, mut x []f32, incx int) {
+	C.cblas_stpsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &ap[0] },
+		unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn dtpsv(uplo bool, trans bool, diag Diagonal, n int, ap []f64, mut x []f64, incx int) {
+	C.cblas_dtpsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &ap[0] },
+		unsafe { &x[0] }, incx)
+}
+
+@[inline]
+pub fn ctpsv(uplo bool, trans bool, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) {
+	C.cblas_ctpsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, ap, x, incx)
+}
+
+@[inline]
+pub fn ztpsv(uplo bool, trans bool, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) {
+	C.cblas_ztpsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, ap, x, incx)
+}
+
+@[inline]
+pub fn ssymv(uplo bool, n int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) {
+	C.cblas_ssymv(.row_major, c_uplo(uplo), n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
+		incx, beta, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn dsymv(uplo bool, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
+	C.cblas_dsymv(.row_major, c_uplo(uplo), n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
+		incx, beta, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn chemv(uplo bool, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_chemv(.row_major, c_uplo(uplo), n, alpha, a, lda, x, incx, beta, y, incy)
+}
+
+@[inline]
+pub fn zhemv(uplo bool, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_zhemv(.row_major, c_uplo(uplo), n, alpha, a, lda, x, incx, beta, y, incy)
+}
+
+@[inline]
+pub fn sspmv(uplo bool, n int, alpha f32, ap []f32, x []f32, incx int, beta f32, mut y []f32, incy int) {
+	C.cblas_sspmv(.row_major, c_uplo(uplo), n, alpha, unsafe { &ap[0] }, unsafe { &x[0] },
+		incx, beta, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn dspmv(uplo bool, n int, alpha f64, ap []f64, x []f64, incx int, beta f64, mut y []f64, incy int) {
+	C.cblas_dspmv(.row_major, c_uplo(uplo), n, alpha, unsafe { &ap[0] }, unsafe { &x[0] },
+		incx, beta, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn sspr(uplo bool, n int, alpha f32, x []f32, incx int, mut ap []f32) {
+	C.cblas_sspr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &ap[0] })
+}
+
+@[inline]
+pub fn dspr(uplo bool, n int, alpha f64, x []f64, incx int, mut ap []f64) {
+	C.cblas_dspr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &ap[0] })
+}
+
+@[inline]
+pub fn chpr(uplo bool, n int, alpha f32, x voidptr, incx int, mut a voidptr) {
+	C.cblas_chpr(.row_major, c_uplo(uplo), n, alpha, x, incx, a)
+}
+
+@[inline]
+pub fn zhpr(uplo bool, n int, alpha f64, x voidptr, incx int, mut a voidptr) {
+	C.cblas_zhpr(.row_major, c_uplo(uplo), n, alpha, x, incx, a)
+}
+
+@[inline]
+pub fn sspr2(uplo bool, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32) {
+	C.cblas_sspr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] },
+		incy, unsafe { &a[0] })
+}
+
+@[inline]
+pub fn dspr2(uplo bool, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64) {
+	C.cblas_dspr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] },
+		incy, unsafe { &a[0] })
+}
+
+@[inline]
+pub fn chpr2(uplo bool, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut ap voidptr) {
+	C.cblas_chpr2(.row_major, c_uplo(uplo), n, alpha, x, incx, y, incy, ap)
+}
+
+@[inline]
+pub fn zhpr2(uplo bool, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut ap voidptr) {
+	C.cblas_zhpr2(.row_major, c_uplo(uplo), n, alpha, x, incx, y, incy, ap)
+}
+
+@[inline]
+pub fn chbmv(uplo bool, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_chbmv(.row_major, c_uplo(uplo), n, k, alpha, a, lda, x, incx, beta, y, incy)
+}
+
+@[inline]
+pub fn zhbmv(uplo bool, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_zhbmv(.row_major, c_uplo(uplo), n, k, alpha, a, lda, x, incx, beta, y, incy)
+}
+
+@[inline]
+pub fn chpmv(uplo bool, n int, alpha voidptr, ap voidptr, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_chpmv(.row_major, c_uplo(uplo), n, alpha, ap, x, incx, beta, y, incy)
+}
+
+@[inline]
+pub fn zhpmv(uplo bool, n int, alpha voidptr, ap voidptr, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_zhpmv(.row_major, c_uplo(uplo), n, alpha, ap, x, incx, beta, y, incy)
+}
+
+@[inline]
+pub fn ssyrk(uplo bool, trans bool, n int, k int, alpha f32, a []f32, lda int, beta f32, mut c []f32, ldc int) {
+	C.cblas_ssyrk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, unsafe { &a[0] },
+		lda, beta, unsafe { &c[0] }, ldc)
+}
+
+@[inline]
+pub fn dsyrk(uplo bool, trans bool, n int, k int, alpha f64, a []f64, lda int, beta f64, mut c []f64, ldc int) {
+	C.cblas_dsyrk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, unsafe { &a[0] },
+		lda, beta, unsafe { &c[0] }, ldc)
+}
+
+@[inline]
+pub fn csyrk(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, mut c voidptr, ldc int) {
+	C.cblas_csyrk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, beta,
+		c, ldc)
+}
+
+@[inline]
+pub fn zsyrk(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, mut c voidptr, ldc int) {
+	C.cblas_zsyrk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, beta,
+		c, ldc)
+}
+
+@[inline]
+pub fn ssyr2k(uplo bool, trans bool, n int, k int, alpha f32, a []f32, lda int, b []f32, ldb int, beta f32, mut c []f32, ldc int) {
+	C.cblas_ssyr2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, unsafe { &a[0] },
+		lda, unsafe { &b[0] }, ldb, beta, unsafe { &c[0] }, ldc)
+}
+
+@[inline]
+pub fn dsyr2k(uplo bool, trans bool, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut c []f64, ldc int) {
+	C.cblas_dsyr2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, unsafe { &a[0] },
+		lda, unsafe { &b[0] }, ldb, beta, unsafe { &c[0] }, ldc)
+}
+
+@[inline]
+pub fn csyr2k(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) {
+	C.cblas_csyr2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, b, ldb,
+		beta, c, ldc)
+}
+
+@[inline]
+pub fn zsyr2k(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) {
+	C.cblas_zsyr2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, b, ldb,
+		beta, c, ldc)
+}
+
+@[inline]
+pub fn strmm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha f32, a []f32, lda int, mut b []f32, ldb int) {
+	C.cblas_strmm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, unsafe { &a[0] },
+		lda, unsafe { &b[0] }, ldb)
+}
+
+@[inline]
+pub fn dtrmm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha f64, a []f64, lda int, mut b []f64, ldb int) {
+	C.cblas_dtrmm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, unsafe { &a[0] },
+		lda, unsafe { &b[0] }, ldb)
+}
+
+@[inline]
+pub fn ctrmm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) {
+	C.cblas_ctrmm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, a,
+		lda, b, ldb)
+}
+
+@[inline]
+pub fn ztrmm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) {
+	C.cblas_ztrmm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, a,
+		lda, b, ldb)
+}
+
+@[inline]
+pub fn strsm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha f32, a []f32, lda int, mut b []f32, ldb int) {
+	C.cblas_strsm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, unsafe { &a[0] },
+		lda, unsafe { &b[0] }, ldb)
+}
+
+@[inline]
+pub fn dtrsm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha f64, a []f64, lda int, mut b []f64, ldb int) {
+	C.cblas_dtrsm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, unsafe { &a[0] },
+		lda, unsafe { &b[0] }, ldb)
+}
+
+@[inline]
+pub fn ctrsm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) {
+	C.cblas_ctrsm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, a,
+		lda, b, ldb)
+}
+
+@[inline]
+pub fn ztrsm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) {
+	C.cblas_ztrsm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, a,
+		lda, b, ldb)
+}
+
+@[inline]
+pub fn chemm(side Side, uplo bool, m int, n int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) {
+	C.cblas_chemm(.row_major, side, c_uplo(uplo), m, n, alpha, a, lda, b, ldb, beta, c,
+		ldc)
+}
+
+@[inline]
+pub fn zhemm(side Side, uplo bool, m int, n int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) {
+	C.cblas_zhemm(.row_major, side, c_uplo(uplo), m, n, alpha, a, lda, b, ldb, beta, c,
+		ldc)
+}
+
+@[inline]
+pub fn cherk(uplo bool, trans bool, n int, k int, alpha f32, a voidptr, lda int, beta f32, mut c voidptr, ldc int) {
+	C.cblas_cherk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, beta,
+		c, ldc)
+}
+
+@[inline]
+pub fn zherk(uplo bool, trans bool, n int, k int, alpha f64, a voidptr, lda int, beta f64, mut c voidptr, ldc int) {
+	C.cblas_zherk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, beta,
+		c, ldc)
+}
+
+@[inline]
+pub fn cher2k(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta f32, mut c voidptr, ldc int) {
+	C.cblas_cher2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, b, ldb,
+		beta, c, ldc)
+}
+
+@[inline]
+pub fn zher2k(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta f64, mut c voidptr, ldc int) {
+	C.cblas_zher2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, b, ldb,
+		beta, c, ldc)
+}
+
+@[inline]
+pub fn saxpby(n int, alpha f32, x []f32, incx int, beta f32, mut y []f32, incy int) {
+	C.cblas_saxpby(n, alpha, unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn daxpby(n int, alpha f64, x []f64, incx int, beta f64, mut y []f64, incy int) {
+	C.cblas_daxpby(n, alpha, unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy)
+}
+
+@[inline]
+pub fn caxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_caxpby(n, alpha, x, incx, beta, y, incy)
+}
+
+@[inline]
+pub fn zaxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_zaxpby(n, alpha, x, incx, beta, y, incy)
+}
+
+@[inline]
+pub fn somatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha f32, a []f32, lda int, mut b []f32, ldb int) {
+	C.cblas_somatcopy(order, c_trans(trans), rows, cols, alpha, unsafe { &a[0] }, lda,
+		unsafe { &b[0] }, ldb)
+}
+
+@[inline]
+pub fn domatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha f64, a []f64, lda int, mut b []f64, ldb int) {
+	C.cblas_domatcopy(order, c_trans(trans), rows, cols, alpha, unsafe { &a[0] }, lda,
+		unsafe { &b[0] }, ldb)
+}
+
+@[inline]
+pub fn comatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha &f32, a &f32, lda int, mut b &f32, ldb int) {
+	C.cblas_comatcopy(order, c_trans(trans), rows, cols, alpha, a, lda, b, ldb)
+}
+
+@[inline]
+pub fn zomatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha &f64, a &f64, lda int, mut b &f64, ldb int) {
+	C.cblas_zomatcopy(order, c_trans(trans), rows, cols, alpha, a, lda, b, ldb)
+}
+
+@[inline]
+pub fn simatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha f32, mut a []f32, lda int, ldb int) {
+	C.cblas_simatcopy(order, c_trans(trans), rows, cols, alpha, unsafe { &a[0] }, lda,
+		ldb)
+}
+
+@[inline]
+pub fn dimatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha f64, mut a []f64, lda int, ldb int) {
+	C.cblas_dimatcopy(order, c_trans(trans), rows, cols, alpha, unsafe { &a[0] }, lda,
+		ldb)
+}
+
+@[inline]
+pub fn cimatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha &f32, mut a &f32, lda int, ldb int) {
+	C.cblas_cimatcopy(order, c_trans(trans), rows, cols, alpha, a, lda, ldb)
+}
+
+@[inline]
+pub fn zimatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha &f64, mut a &f64, lda int, ldb int) {
+	C.cblas_zimatcopy(order, c_trans(trans), rows, cols, alpha, a, lda, ldb)
+}
+
+@[inline]
+pub fn sgeadd(order MemoryLayout, rows int, cols int, alpha f32, a []f32, lda int, beta f32, mut c []f32, ldc int) {
+	C.cblas_sgeadd(order, rows, cols, alpha, unsafe { &a[0] }, lda, beta, unsafe { &c[0] },
+		ldc)
+}
+
+@[inline]
+pub fn dgeadd(order MemoryLayout, rows int, cols int, alpha f64, a []f64, lda int, beta f64, mut c []f64, ldc int) {
+	C.cblas_dgeadd(order, rows, cols, alpha, unsafe { &a[0] }, lda, beta, unsafe { &c[0] },
+		ldc)
+}
+
+@[inline]
+pub fn cgeadd(order MemoryLayout, rows int, cols int, alpha &f32, a &f32, lda int, beta &f32, mut c &f32, ldc int) {
+	C.cblas_cgeadd(order, rows, cols, alpha, a, lda, beta, c, ldc)
+}
+
+@[inline]
+pub fn zgeadd(order MemoryLayout, rows int, cols int, alpha &f64, a &f64, lda int, beta &f64, mut c &f64, ldc int) {
+	C.cblas_zgeadd(order, rows, cols, alpha, a, lda, beta, c, ldc)
 }
 
 @[inline]
diff --git a/la/matrix_ops.v b/la/matrix_ops.v
index 74dc6ea28..a792aeede 100644
--- a/la/matrix_ops.v
+++ b/la/matrix_ops.v
@@ -90,7 +90,7 @@ pub fn matrix_svd(mut s []f64, mut u Matrix[f64], mut vt Matrix[f64], mut a Matr
 	if copy_a {
 		acpy = a.clone()
 	}
-	lapack.dgesvd(&char('A'.str), &char('A'.str), a.m, a.n, acpy.data, 1, s, u.data, a.m,
+	lapack.dgesvd(.svd_all, .svd_all, a.m, a.n, mut acpy.data, 1, s, mut u.data, a.m, mut
 		vt.data, a.n, superb)
 }
 
diff --git a/lapack/conversions.v b/lapack/conversions.v
new file mode 100644
index 000000000..fcb1cb239
--- /dev/null
+++ b/lapack/conversions.v
@@ -0,0 +1,70 @@
+module lapack
+
+import vsl.lapack.lapack64
+
+// Direct specifies the direction of the multiplication for the Householder matrix.
+pub type Direct = lapack64.Direct
+
+// Sort is the sorting order.
+pub type Sort = lapack64.Sort
+
+// StoreV indicates the storage direction of elementary reflectors.
+pub type StoreV = lapack64.StoreV
+
+// MatrixNorm represents the kind of matrix norm to compute.
+pub type MatrixNorm = lapack64.MatrixNorm
+
+// MatrixType represents the kind of matrix represented in the data.
+pub type MatrixType = lapack64.MatrixType
+
+// Pivot specifies the pivot type for plane rotations.
+pub type Pivot = lapack64.Pivot
+
+// ApplyOrtho specifies which orthogonal matrix is applied in Dormbr.
+pub type ApplyOrtho = lapack64.ApplyOrtho
+
+// GenOrtho specifies which orthogonal matrix is generated in Dorgbr.
+pub type GenOrtho = lapack64.GenOrtho
+
+// SVDJob specifies the singular vector computation type for SVD.
+pub type SVDJob = lapack64.SVDJob
+
+// GSVDJob specifies the singular vector computation type for Generalized SVD.
+pub type GSVDJob = lapack64.GSVDJob
+
+// EVComp specifies how eigenvectors are computed in Dsteqr.
+pub type EVComp = lapack64.EVComp
+
+// EVJob specifies whether eigenvectors are computed in Dsyev.
+pub type EVJob = lapack64.EVJob
+
+// LeftEVJob specifies whether left eigenvectors are computed in Dgeev.
+pub type LeftEVJob = lapack64.LeftEVJob
+
+// RightEVJob specifies whether right eigenvectors are computed in Dgeev.
+pub type RightEVJob = lapack64.RightEVJob
+
+// BalanceJob specifies matrix balancing operation.
+pub type BalanceJob = lapack64.BalanceJob
+
+// SchurJob specifies whether the Schur form is computed in Dhseqr.
+pub type SchurJob = lapack64.SchurJob
+
+// SchurComp specifies whether and how the Schur vectors are computed in Dhseqr.
+pub type SchurComp = lapack64.SchurComp
+
+// UpdateSchurComp specifies whether the matrix of Schur vectors is updated in Dtrexc.
+pub type UpdateSchurComp = lapack64.UpdateSchurComp
+
+// EVSide specifies what eigenvectors are computed in Dtrevc3.
+pub type EVSide = lapack64.EVSide
+
+// EVHowMany specifies which eigenvectors are computed in Dtrevc3 and how.
+pub type EVHowMany = lapack64.EVHowMany
+
+// MaximizeNormXJob specifies the heuristic method for computing a contribution to
+// the reciprocal Dif-estimate in Dlatdf.
+pub type MaximizeNormXJob = lapack64.MaximizeNormXJob
+
+// OrthoComp specifies whether and how the orthogonal matrix is computed in Dgghrd.
+pub type OrthoComp = lapack64.OrthoComp
diff --git a/lapack/lapack64/conversions.v b/lapack/lapack64/conversions.v
new file mode 100644
index 000000000..5052633f9
--- /dev/null
+++ b/lapack/lapack64/conversions.v
@@ -0,0 +1,199 @@
+module lapack64
+
+// Direct specifies the direction of the multiplication for the Householder matrix.
+pub enum Direct as u8 {
+	// Reflectors are right-multiplied, H_0 * H_1 * ... * H_{k-1}.
+	forward  = u8(`F`)
+	// Reflectors are left-multiplied, H_{k-1} * ... * H_1 * H_0.
+	backward = u8(`B`)
+}
+
+// Sort is the sorting order.
+pub enum Sort as u8 {
+	sort_increasing = u8(`I`)
+	sort_decreasing = u8(`D`)
+}
+
+// StoreV indicates the storage direction of elementary reflectors.
+pub enum StoreV as u8 {
+	// Reflector stored in a column of the matrix.
+	column_wise = u8(`C`)
+	// Reflector stored in a row of the matrix.
+	row_wise    = u8(`R`)
+}
+
+// MatrixNorm represents the kind of matrix norm to compute.
+pub enum MatrixNorm as u8 {
+	// max(abs(A(i,j)))
+	max_abs        = u8(`M`)
+	// Maximum absolute column sum (one norm)
+	max_column_sum = u8(`O`)
+	// Maximum absolute row sum (infinity norm)
+	max_row_sum    = u8(`I`)
+	// Frobenius norm (sqrt of sum of squares)
+	frobenius      = u8(`F`)
+}
+
+// MatrixType represents the kind of matrix represented in the data.
+pub enum MatrixType as u8 {
+	// A general dense matrix.
+	general   = u8(`G`)
+	// An upper triangular matrix.
+	upper_tri = u8(`U`)
+	// A lower triangular matrix.
+	lower_tri = u8(`L`)
+}
+
+// Pivot specifies the pivot type for plane rotations.
+pub enum Pivot as u8 {
+	variable = u8(`V`)
+	top      = u8(`T`)
+	bottom   = u8(`B`)
+}
+
+// ApplyOrtho specifies which orthogonal matrix is applied in Dormbr.
+pub enum ApplyOrtho as u8 {
+	// Apply P or Pᵀ.
+	apply_p = u8(`P`)
+	// Apply Q or Qᵀ.
+	apply_q = u8(`Q`)
+}
+
+// GenOrtho specifies which orthogonal matrix is generated in Dorgbr.
+pub enum GenOrtho as u8 {
+	// Generate Pᵀ.
+	generate_pt = u8(`P`)
+	// Generate Q.
+	generate_q  = u8(`Q`)
+}
+
+// SVDJob specifies the singular vector computation type for SVD.
+pub enum SVDJob as u8 {
+	// Compute all columns of the orthogonal matrix U or V.
+	svd_all       = u8(`A`)
+	// Compute the singular vectors and store them in the orthogonal matrix U or V.
+	svd_store     = u8(`S`)
+	// Compute the singular vectors and overwrite them on the input matrix A.
+	svd_overwrite = u8(`O`)
+	// Do not compute singular vectors.
+	svd_none      = u8(`N`)
+}
+
+// GSVDJob specifies the singular vector computation type for Generalized SVD.
+pub enum GSVDJob as u8 {
+	// Compute orthogonal matrix U.
+	gsvd_u    = u8(`U`)
+	// Compute orthogonal matrix V.
+	gsvd_v    = u8(`V`)
+	// Compute orthogonal matrix Q.
+	gsvd_q    = u8(`Q`)
+	// Use unit-initialized matrix.
+	gsvd_unit = u8(`I`)
+	// Do not compute orthogonal matrix.
+	gsvd_none = u8(`N`)
+}
+
+// EVComp specifies how eigenvectors are computed in Dsteqr.
+pub enum EVComp as u8 {
+	// Compute eigenvectors of the original symmetric matrix.
+	ev_orig      = u8(`V`)
+	// Compute eigenvectors of the tridiagonal matrix.
+	ev_tridiag   = u8(`I`)
+	// Do not compute eigenvectors.
+	ev_comp_none = u8(`N`)
+}
+
+// EVJob specifies whether eigenvectors are computed in Dsyev.
+pub enum EVJob as u8 {
+	// Compute eigenvectors.
+	ev_compute = u8(`V`)
+	// Do not compute eigenvectors.
+	ev_none    = u8(`N`)
+}
+
+// LeftEVJob specifies whether left eigenvectors are computed in Dgeev.
+pub enum LeftEVJob as u8 {
+	// Compute left eigenvectors.
+	left_ev_compute = u8(`V`)
+	// Do not compute left eigenvectors.
+	left_ev_none    = u8(`N`)
+}
+
+// RightEVJob specifies whether right eigenvectors are computed in Dgeev.
+pub enum RightEVJob as u8 {
+	// Compute right eigenvectors.
+	right_ev_compute = u8(`V`)
+	// Do not compute right eigenvectors.
+	right_ev_none    = u8(`N`)
+}
+
+// BalanceJob specifies matrix balancing operation.
+pub enum BalanceJob as u8 {
+	permute       = u8(`P`)
+	scale         = u8(`S`)
+	permute_scale = u8(`B`)
+	balance_none  = u8(`N`)
+}
+
+// SchurJob specifies whether the Schur form is computed in Dhseqr.
+pub enum SchurJob as u8 {
+	eigenvalues_only      = u8(`E`)
+	eigenvalues_and_schur = u8(`S`)
+}
+
+// SchurComp specifies whether and how the Schur vectors are computed in Dhseqr.
+pub enum SchurComp as u8 {
+	// Compute Schur vectors of the original matrix.
+	schur_orig = u8(`V`)
+	// Compute Schur vectors of the upper Hessenberg matrix.
+	schur_hess = u8(`I`)
+	// Do not compute Schur vectors.
+	schur_none = u8(`N`)
+}
+
+// UpdateSchurComp specifies whether the matrix of Schur vectors is updated in Dtrexc.
+pub enum UpdateSchurComp as u8 {
+	// Update the matrix of Schur vectors.
+	update_schur      = u8(`V`)
+	// Do not update the matrix of Schur vectors.
+	update_schur_none = u8(`N`)
+}
+
+// EVSide specifies what eigenvectors are computed in Dtrevc3.
+pub enum EVSide as u8 {
+	// Compute only right eigenvectors.
+	ev_right = u8(`R`)
+	// Compute only left eigenvectors.
+	ev_left  = u8(`L`)
+	// Compute both right and left eigenvectors.
+	ev_both  = u8(`B`)
+}
+
+// EVHowMany specifies which eigenvectors are computed in Dtrevc3 and how.
+pub enum EVHowMany as u8 {
+	// Compute all right and/or left eigenvectors.
+	ev_all       = u8(`A`)
+	// Compute all right and/or left eigenvectors multiplied by an input matrix.
+	ev_all_mul_q = u8(`B`)
+	// Compute selected right and/or left eigenvectors.
+	ev_selected  = u8(`S`)
+}
+
+// MaximizeNormXJob specifies the heuristic method for computing a contribution to
+// the reciprocal Dif-estimate in Dlatdf.
+pub enum MaximizeNormXJob as u8 {
+	// Solve Z*x=h-f where h is a vector of ±1.
+	local_look_ahead       = 0
+	// Compute an approximate null-vector e of Z, normalize e and solve Z*x=±e-f.
+	normalized_null_vector = 2
+}
+
+// OrthoComp specifies whether and how the orthogonal matrix is computed in Dgghrd.
+pub enum OrthoComp as u8 {
+	// Do not compute the orthogonal matrix.
+	ortho_none     = u8(`N`)
+	// The orthogonal matrix is formed explicitly and returned in the argument.
+	ortho_explicit = u8(`I`)
+	// The orthogonal matrix is post-multiplied into the matrix stored in the argument on entry.
+	ortho_postmul  = u8(`V`)
+}
diff --git a/lapack/lapack64/dgebal.v b/lapack/lapack64/dgebal.v
new file mode 100644
index 000000000..ee7746a38
--- /dev/null
+++ b/lapack/lapack64/dgebal.v
@@ -0,0 +1,33 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// dgebal balances a general real matrix A.
+pub fn dgebal(job BalanceJob, n int, mut a []f64, lda int, scale []f64) int {
+	if n == 0 {
+		return 0
+	}
+
+	mut info := 0
+	if job != .balance_none && job != .permute && job != .scale && job != .permute_scale {
+		info = -1
+	} else if n < 0 {
+		info = -2
+	} else if lda < math.max(1, n) {
+		info = -4
+	}
+
+	if info != 0 {
+		return info
+	}
+
+	// Quick return if possible
+	if n == 0 {
+		return 0
+	}
+
+	// Placeholder for the actual LAPACK function calls
+	// Example: info = dgebal(job, n, a, lda, scale)
+	return info
+}
diff --git a/lapack/lapack64/dgeev.v b/lapack/lapack64/dgeev.v
new file mode 100644
index 000000000..38799070c
--- /dev/null
+++ b/lapack/lapack64/dgeev.v
@@ -0,0 +1,39 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// dgeev computes the eigenvalues and, optionally, the left and/or right eigenvectors for a real nonsymmetric matrix A.
+pub fn dgeev(jobvl LeftEVJob, jobvr LeftEVJob, n int, mut a []f64, lda int, wr []f64, wi []f64, mut vl []f64, ldvl int, mut vr []f64, ldvr int) int {
+	if n == 0 {
+		return 0
+	}
+
+	mut info := 0
+	if jobvl != .left_ev_none && jobvl != .left_ev_compute {
+		info = -1
+	} else if jobvr != .left_ev_none && jobvr != .left_ev_compute {
+		info = -2
+	} else if n < 0 {
+		info = -3
+	} else if lda < math.max(1, n) {
+		info = -5
+	} else if ldvl < 1 || (jobvl == .left_ev_compute && ldvl < n) {
+		info = -8
+	} else if ldvr < 1 || (jobvr == .left_ev_compute && ldvr < n) {
+		info = -10
+	}
+
+	if info != 0 {
+		return info
+	}
+
+	// Quick return if possible
+	if n == 0 {
+		return 0
+	}
+
+	// Placeholder for the actual LAPACK function calls
+	// Example: info = dgehrd(n, ilo, ihi, a, lda, tau, work, lwork)
+	return info
+}
diff --git a/lapack/lapack64/dgehrd.v b/lapack/lapack64/dgehrd.v
new file mode 100644
index 000000000..2823c0c4d
--- /dev/null
+++ b/lapack/lapack64/dgehrd.v
@@ -0,0 +1,35 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// dgehrd reduces a general real matrix A to upper Hessenberg form H by an orthogonal similarity transformation.
+pub fn dgehrd(n int, ilo int, ihi int, mut a []f64, lda int, tau []f64) int {
+	if n == 0 {
+		return 0
+	}
+
+	mut info := 0
+	if n < 0 {
+		info = -1
+	} else if ilo < 1 || ilo > math.max(1, n) {
+		info = -2
+	} else if ihi < math.min(ilo, n) || ihi > n {
+		info = -3
+	} else if lda < math.max(1, n) {
+		info = -5
+	}
+
+	if info != 0 {
+		return info
+	}
+
+	// Quick return if possible
+	if n == 0 {
+		return 0
+	}
+
+	// Placeholder for the actual LAPACK function calls
+	// Example: info = dgehrd(n, ilo, ihi, a, lda, tau, work, lwork)
+	return info
+}
diff --git a/lapack/lapack64/dgesv.v b/lapack/lapack64/dgesv.v
index d0d7f90a5..94f948503 100644
--- a/lapack/lapack64/dgesv.v
+++ b/lapack/lapack64/dgesv.v
@@ -22,7 +22,7 @@ import vsl.blas
 // The factored form of A is then used to solve the system of equations A * X =
 // B. On entry, b contains the right hand side matrix B. On return, if ok is
 // true, b contains the solution matrix X.
-pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb int) {
+pub fn dgesv(n int, nrhs int, mut a []f64, lda int, mut ipiv []int, mut b []f64, ldb int) {
 	if n < 0 {
 		panic(n_lt0)
 	}
@@ -51,6 +51,6 @@ pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb
 		panic(short_b)
 	}
 
-	dgetrf(n, n, mut a, lda, ipiv)
+	dgetrf(n, n, mut a, lda, mut ipiv)
 	dgetrs(.no_trans, n, nrhs, mut a, lda, ipiv, mut b, ldb)
 }
diff --git a/lapack/lapack64/dgesvd.v b/lapack/lapack64/dgesvd.v
new file mode 100644
index 000000000..a8f035e7d
--- /dev/null
+++ b/lapack/lapack64/dgesvd.v
@@ -0,0 +1,42 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// dgesvd computes the singular value decomposition (SVD) of a real matrix A.
+pub fn dgesvd(jobu SVDJob, jobvt SVDJob, m int, n int, mut a []f64, lda int, s []f64, mut u []f64, ldu int, mut vt []f64, ldvt int, superb []f64) int {
+	if m == 0 || n == 0 {
+		return 0
+	}
+
+	mut info := 0
+	if jobu != .svd_all && jobu != .svd_store && jobu != .svd_overwrite && jobu != .svd_none {
+		info = -1
+	} else if jobvt != .svd_all && jobvt != .svd_store && jobvt != .svd_overwrite
+		&& jobvt != .svd_none {
+		info = -2
+	} else if m < 0 {
+		info = -3
+	} else if n < 0 {
+		info = -4
+	} else if lda < math.max(1, m) {
+		info = -6
+	} else if ldu < 1 || (jobu == .svd_store && ldu < m) || (jobu == .svd_all && ldu < m) {
+		info = -9
+	} else if ldvt < 1 || (jobvt == .svd_store && ldvt < n) || (jobvt == .svd_all && ldvt < n) {
+		info = -11
+	}
+
+	if info != 0 {
+		return info
+	}
+
+	// Quick return if possible
+	if m == 0 || n == 0 {
+		return 0
+	}
+
+	// Placeholder for the actual LAPACK function calls
+	// Example: info = dgesvd(jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork)
+	return info
+}
diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v
index e1b6553fc..878c7b5b1 100644
--- a/lapack/lapack64/dgetrf.v
+++ b/lapack/lapack64/dgetrf.v
@@ -24,7 +24,7 @@ import vsl.blas
 // Dgetrf returns whether the matrix A is nonsingular. The LU decomposition will
 // be computed regardless of the singularity of A, but the result should not be
 // used to solve a system of equation.
-pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) {
+pub fn dgetrf(m int, n int, mut a []f64, lda int, mut ipiv []int) {
 	mn := math.min(m, n)
 
 	if m < 0 {
@@ -34,7 +34,7 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) {
 		panic(n_lt0)
 	}
 	if lda < math.max(1, n) {
-		panic(bad_lda)
+		panic(bad_ld_a)
 	}
 
 	// quick return if possible
@@ -53,7 +53,8 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) {
 
 	if nb <= 1 || nb >= mn {
 		// use the unblocked algorithm.
-		return dgetf2(m, n, mut a, lda, ipiv)
+		dgetf2(m, n, mut a, lda, ipiv)
+		return
 	}
 
 	for j := 0; j < mn; j += nb {
@@ -75,12 +76,12 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) {
 			dlaswp(j, mut slice1, lda, j, j + jb, ipiv[..j + jb], 1)
 
 			mut slice2 := unsafe { a[j * lda + j + jb..] }
-			blas.dtstrf(.left, .lower, .notrans, .unit, jb, n - j - jb, 1, a[j * lda + j..],
+			blas.dtstrf(.left, false, false, .unit, jb, n - j - jb, 1, a[j * lda + j..],
 				lda, mut slice2, lda)
 
 			if j + jb < m {
 				mut slice3 := unsafe { a[(j + jb) * lda + j + jb..] }
-				blas.dgemm(.notrans, .notrans, m - j - jb, n - j - jb, jb, -1, a[(j + jb) * lda + j..],
+				blas.dgemm(false, false, m - j - jb, n - j - jb, jb, -1, a[(j + jb) * lda + j..],
 					lda, a[j * lda + j + jb..], lda, 1, mut slice3, lda)
 			}
 		}
diff --git a/lapack/lapack64/dgetri.v b/lapack/lapack64/dgetri.v
new file mode 100644
index 000000000..8cd14300e
--- /dev/null
+++ b/lapack/lapack64/dgetri.v
@@ -0,0 +1,31 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// dgetri computes the inverse of a matrix using the LU factorization computed by dgetrf.
+pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) int {
+	if n == 0 {
+		return 0
+	}
+
+	mut info := 0
+	if n < 0 {
+		info = -1
+	} else if lda < math.max(1, n) {
+		info = -3
+	}
+
+	if info != 0 {
+		return info
+	}
+
+	// Quick return if possible
+	if n == 0 {
+		return 0
+	}
+
+	// Placeholder for the actual LAPACK function calls
+	// Example: info = dgetri(n, a, lda, ipiv, work, lwork)
+	return info
+}
diff --git a/lapack/lapack64/dgetrs.v b/lapack/lapack64/dgetrs.v
index 0458cc1e2..bbbc586b0 100644
--- a/lapack/lapack64/dgetrs.v
+++ b/lapack/lapack64/dgetrs.v
@@ -52,16 +52,15 @@ pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []f64, lda int, ipiv
 		// Solve A * X = B.
 		dlaswp(nrhs, b, ldb, 0, n - 1, ipiv, 1)
 		// Solve L * X = B, overwriting B with X.
-		blas.dtrsm(.left, .lower, .no_trans, .unit, n, nrhs, 1, mut a, lda, mut b, ldb)
+		blas.dtrsm(.left, false, false, .unit, n, nrhs, 1, a, lda, mut b, ldb)
 		// Solve U * X = B, overwriting B with X.
-		blas.dtrsm(.left, .upper, .no_trans, .non_unit, n, nrhs, 1, mut a, lda, mut b,
-			ldb)
+		blas.dtrsm(.left, true, false, .non_unit, n, nrhs, 1, a, lda, mut b, ldb)
 	}
 
 	// Solve Aᵀ * X = B.
 	// Solve Uᵀ * X = B, overwriting B with X.
-	blas.dtrsm(.left, .upper, .trans, .non_unit, n, nrhs, 1, mut a, lda, mut b, ldb)
+	blas.dtrsm(.left, true, true, .non_unit, n, nrhs, 1, a, lda, mut b, ldb)
 	// Solve Lᵀ * X = B, overwriting B with X.
-	blas.dtrsm(.left, .lower, .trans, .unit, n, nrhs, 1, mut a, lda, mut b, ldb)
+	blas.dtrsm(.left, false, true, .unit, n, nrhs, 1, a, lda, mut b, ldb)
 	dlaswp(nrhs, b, ldb, 0, n - 1, ipiv, -1)
 }
diff --git a/lapack/lapack64/dpotrf.v b/lapack/lapack64/dpotrf.v
new file mode 100644
index 000000000..b4d307809
--- /dev/null
+++ b/lapack/lapack64/dpotrf.v
@@ -0,0 +1,33 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// dpotrf computes the Cholesky factorization of a real symmetric positive definite matrix A.
+pub fn dpotrf(uplo blas.Uplo, n int, mut a []f64, lda int) int {
+	if n == 0 {
+		return 0
+	}
+
+	mut info := 0
+	if uplo != .upper && uplo != .lower {
+		info = -1
+	} else if n < 0 {
+		info = -2
+	} else if lda < math.max(1, n) {
+		info = -4
+	}
+
+	if info != 0 {
+		return info
+	}
+
+	// Quick return if possible
+	if n == 0 {
+		return 0
+	}
+
+	// Placeholder for the actual LAPACK function calls
+	// Example: info = dpotrf(uplo, n, a, lda, work, lwork)
+	return info
+}
diff --git a/lapack/lapack64/dsyev.v b/lapack/lapack64/dsyev.v
new file mode 100644
index 000000000..a48998d2a
--- /dev/null
+++ b/lapack/lapack64/dsyev.v
@@ -0,0 +1,38 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// dsyev computes all eigenvalues and, optionally, eigenvectors of a real symmetric matrix A.
+pub fn dsyev(jobz EVJob, uplo blas.Uplo, n int, mut a []f64, lda int, w []f64) int {
+	if n == 0 {
+		return 0
+	}
+
+	mut info := 0
+	if jobz != .ev_none && jobz != .ev_compute {
+		info = -1
+	} else if uplo != .upper && uplo != .lower {
+		info = -2
+	} else if n < 0 {
+		info = -3
+	} else if lda < math.max(1, n) {
+		info = -5
+	}
+
+	if info != 0 {
+		return info
+	}
+
+	// Quick return if possible
+	if n == 0 {
+		return 0
+	}
+
+	// Call the relevant LAPACK functions
+	// (Here we would call the internal implementations like dsytrd, dorgtr, dormtr, etc.)
+
+	// Placeholder for the actual LAPACK function calls
+	// Example: info = dsytrd(uplo, n, a, lda, w, work, lwork)
+	return info
+}
diff --git a/lapack/lapack64/ilaenv.v b/lapack/lapack64/ilaenv.v
index 52d2e4e7d..08151271b 100644
--- a/lapack/lapack64/ilaenv.v
+++ b/lapack/lapack64/ilaenv.v
@@ -1,5 +1,7 @@
 module lapack64
 
+import math
+
 // ilaenv returns algorithm tuning parameters for the algorithm given by the
 // input string. ispec specifies the parameter to return:
 //
@@ -242,7 +244,7 @@ fn ilaenv(ispec int, name string, opts string, n1 int, n2 int, n3 int, n4 int) i
 			// Used by xGELSS and xGESVD
 			// Assuming n1 and n2 are defined elsewhere in your code
 			// Replace `min(n1, n2)` with actual min calculation or function
-			return int(f64(min(n1, n2)) * 1.6)
+			return int(f64(math.min(n1, n2)) * 1.6)
 		}
 		7 {
 			// Not used
diff --git a/lapack/lapack_d_vsl_lapack_common.v b/lapack/lapack_d_vsl_lapack_lapacke.v
similarity index 78%
rename from lapack/lapack_d_vsl_lapack_common.v
rename to lapack/lapack_d_vsl_lapack_lapacke.v
index 0c4064c2b..cbc7d9839 100644
--- a/lapack/lapack_d_vsl_lapack_common.v
+++ b/lapack/lapack_d_vsl_lapack_lapacke.v
@@ -5,19 +5,19 @@ import vsl.blas
 
 fn C.LAPACKE_dgesv(matrix_layout blas.MemoryLayout, n int, nrhs int, a &f64, lda int, ipiv &int, b &f64, ldb int) int
 
-fn C.LAPACKE_dgesvd(matrix_layout blas.MemoryLayout, jobu &char, jobvt &char, m int, n int, a &f64, lda int, s &f64, u &f64, ldu int, vt &f64, ldvt int, superb &f64) int
+fn C.LAPACKE_dgesvd(matrix_layout blas.MemoryLayout, jobu SVDJob, jobvt SVDJob, m int, n int, a &f64, lda int, s &f64, u &f64, ldu int, vt &f64, ldvt int, superb &f64) int
 
 fn C.LAPACKE_dgetrf(matrix_layout blas.MemoryLayout, m int, n int, a &f64, lda int, ipiv &int) int
 
 fn C.LAPACKE_dgetri(matrix_layout blas.MemoryLayout, n int, a &f64, lda int, ipiv &int) int
 
-fn C.LAPACKE_dpotrf(matrix_layout blas.MemoryLayout, up u32, n int, a &f64, lda int) int
+fn C.LAPACKE_dpotrf(matrix_layout blas.MemoryLayout, uplo blas.Uplo, n int, a &f64, lda int) int
 
-fn C.LAPACKE_dgeev(matrix_layout blas.MemoryLayout, calc_vl &char, calc_vr &char, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int
+fn C.LAPACKE_dgeev(matrix_layout blas.MemoryLayout, calc_vl LeftEVJob, calc_vr LeftEVJob, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int
 
-fn C.LAPACKE_dsyev(matrix_layout blas.MemoryLayout, jobz byte, uplo byte, n int, a &f64, lda int, w &f64, work &f64, lwork int) int
+fn C.LAPACKE_dsyev(matrix_layout blas.MemoryLayout, jobz EVJob, uplo blas.Uplo, n int, a &f64, lda int, w &f64, work &f64, lwork int) int
 
-fn C.LAPACKE_dgebal(matrix_layout blas.MemoryLayout, job &char, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int
+fn C.LAPACKE_dgebal(matrix_layout blas.MemoryLayout, job BalanceJob, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int
 
 fn C.LAPACKE_dgehrd(matrix_layout blas.MemoryLayout, n int, ilo int, ihi int, a &f64, lda int, tau &f64, work &f64, lwork int) int
 
@@ -74,9 +74,9 @@ pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb
 // Note that the routine returns V**T, not V.
 //
 // NOTE: matrix 'a' will be modified
-pub fn dgesvd(jobu &char, jobvt &char, m int, n int, a []f64, lda int, s []f64, u []f64, ldu int, vt []f64, ldvt int, superb []f64) {
-	info := C.LAPACKE_dgesvd(.row_major, jobu, jobvt, m, n, &a[0], lda, &s[0], &u[0],
-		ldu, &vt[0], ldvt, &superb[0])
+pub fn dgesvd(jobu SVDJob, jobvt SVDJob, m int, n int, mut a []f64, lda int, s []f64, mut u []f64, ldu int, mut vt []f64, ldvt int, superb []f64) {
+	info := C.LAPACKE_dgesvd(.row_major, jobu, jobvt, m, n, unsafe { &a[0] }, lda, &s[0],
+		unsafe { &u[0] }, ldu, unsafe { &vt[0] }, ldvt, &superb[0])
 	if info != 0 {
 		errors.vsl_panic('lapack failed', .efailed)
 	}
@@ -141,9 +141,9 @@ pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) {
 // where U is an upper triangular matrix and L is lower triangular.
 //
 // This is the block version of the algorithm, calling Level 3 BLAS.
-pub fn dpotrf(up bool, n int, mut a []f64, lda int) {
+pub fn dpotrf(uplo bool, n int, mut a []f64, lda int) {
 	unsafe {
-		info := C.LAPACKE_dpotrf(.row_major, blas.l_uplo(up), n, &a[0], lda)
+		info := C.LAPACKE_dpotrf(.row_major, blas.c_uplo(uplo), n, &a[0], lda)
 		if info != 0 {
 			errors.vsl_panic('lapack failed', .efailed)
 		}
@@ -173,24 +173,24 @@ pub fn dpotrf(up bool, n int, mut a []f64, lda int) {
 //
 // The computed eigenvectors are normalized to have Euclidean norm
 // equal to 1 and largest component real.
-pub fn dgeev(calc_vl bool, calc_vr bool, n int, mut a []f64, lda int, wr []f64, wi []f64, vl []f64, ldvl_ int, vr []f64, ldvr_ int) {
+pub fn dgeev(calc_vl LeftEVJob, calc_vr LeftEVJob, n int, mut a []f64, lda int, wr []f64, wi []f64, vl []f64, ldvl_ int, vr []f64, ldvr_ int) {
 	mut vvl := 0.0
 	mut vvr := 0.0
 	mut ldvl := ldvl_
 	mut ldvr := ldvr_
-	if calc_vl {
+	if calc_vl == .left_ev_compute {
 		vvl = vl[0]
 	} else {
 		ldvl = 1
 	}
-	if calc_vr {
+	if calc_vr == .left_ev_compute {
 		vvr = vr[0]
 	} else {
 		ldvr = 1
 	}
 	unsafe {
-		info := C.LAPACKE_dgeev(.row_major, &char(blas.job_vlr(calc_vl).str().str), &char(blas.job_vlr(calc_vr).str().str),
-			n, &a[0], lda, &wr[0], &wi[0], &vvl, ldvl, &vvr, ldvr)
+		info := C.LAPACKE_dgeev(.row_major, calc_vl, calc_vr, n, &a[0], lda, &wr[0], &wi[0],
+			&vvl, ldvl, &vvr, ldvr)
 		if info != 0 {
 			errors.vsl_panic('lapack failed', .efailed)
 		}
diff --git a/lapack/lapack_notd_vsl_lapack_common.v b/lapack/lapack_notd_vsl_lapack_lapacke.v
similarity index 70%
rename from lapack/lapack_notd_vsl_lapack_common.v
rename to lapack/lapack_notd_vsl_lapack_lapacke.v
index 3635ed6cd..39d2cbdd6 100644
--- a/lapack/lapack_notd_vsl_lapack_common.v
+++ b/lapack/lapack_notd_vsl_lapack_lapacke.v
@@ -4,20 +4,6 @@ import vsl.errors
 import vsl.blas
 import vsl.lapack.lapack64
 
-fn C.LAPACKE_dgesvd(matrix_layout blas.MemoryLayout, jobu &char, jobvt &char, m int, n int, a &f64, lda int, s &f64, u &f64, ldu int, vt &f64, ldvt int, superb &f64) int
-
-fn C.LAPACKE_dgetri(matrix_layout blas.MemoryLayout, n int, a &f64, lda int, ipiv &int) int
-
-fn C.LAPACKE_dpotrf(matrix_layout blas.MemoryLayout, up u32, n int, a &f64, lda int) int
-
-fn C.LAPACKE_dgeev(matrix_layout blas.MemoryLayout, calc_vl &char, calc_vr &char, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int
-
-fn C.LAPACKE_dsyev(matrix_layout blas.MemoryLayout, jobz byte, uplo byte, n int, a &f64, lda int, w &f64, work &f64, lwork int) int
-
-fn C.LAPACKE_dgebal(matrix_layout blas.MemoryLayout, job &char, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int
-
-fn C.LAPACKE_dgehrd(matrix_layout blas.MemoryLayout, n int, ilo int, ihi int, a &f64, lda int, tau &f64, work &f64, lwork int) int
-
 // dgesv computes the solution to a real system of linear equations.
 //
 // See: http://www.netlib.org/lapack/explore-html/d8/d72/dgesv_8f.html
@@ -65,9 +51,9 @@ pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb
 // Note that the routine returns V**T, not V.
 //
 // NOTE: matrix 'a' will be modified
-pub fn dgesvd(jobu &char, jobvt &char, m int, n int, a []f64, lda int, s []f64, u []f64, ldu int, vt []f64, ldvt int, superb []f64) {
-	info := C.LAPACKE_dgesvd(.row_major, jobu, jobvt, m, n, &a[0], lda, &s[0], &u[0],
-		ldu, &vt[0], ldvt, &superb[0])
+pub fn dgesvd(jobu SVDJob, jobvt SVDJob, m int, n int, mut a []f64, lda int, s []f64, mut u []f64, ldu int, mut vt []f64, ldvt int, superb []f64) {
+	info := lapack64.dgesvd(jobu, jobvt, m, n, mut a, lda, s, mut u, ldu, mut vt, ldvt,
+		superb)
 	if info != 0 {
 		errors.vsl_panic('lapack failed', .efailed)
 	}
@@ -102,11 +88,9 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) {
 // This method inverts U and then computes inv(A) by solving the system
 // inv(A)*L = inv(U) for inv(A).
 pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) {
-	unsafe {
-		info := C.LAPACKE_dgetri(.row_major, n, &a[0], lda, &ipiv[0])
-		if info != 0 {
-			errors.vsl_panic('lapack failed', .efailed)
-		}
+	info := lapack64.dgetri(n, mut a, lda, ipiv)
+	if info != 0 {
+		errors.vsl_panic('lapack failed', .efailed)
 	}
 }
 
@@ -128,11 +112,9 @@ pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) {
 //
 // This is the block version of the algorithm, calling Level 3 BLAS.
 pub fn dpotrf(up bool, n int, mut a []f64, lda int) {
-	unsafe {
-		info := C.LAPACKE_dpotrf(.row_major, blas.l_uplo(up), n, &a[0], lda)
-		if info != 0 {
-			errors.vsl_panic('lapack failed', .efailed)
-		}
+	info := lapack64.dpotrf(blas.c_uplo(up), n, mut a, lda)
+	if info != 0 {
+		errors.vsl_panic('lapack failed', .efailed)
 	}
 }
 
@@ -159,26 +141,28 @@ pub fn dpotrf(up bool, n int, mut a []f64, lda int) {
 //
 // The computed eigenvectors are normalized to have Euclidean norm
 // equal to 1 and largest component real.
-pub fn dgeev(calc_vl bool, calc_vr bool, n int, mut a []f64, lda int, wr []f64, wi []f64, vl []f64, ldvl_ int, vr []f64, ldvr_ int) {
+pub fn dgeev(calc_vl LeftEVJob, calc_vr LeftEVJob, n int, mut a []f64, lda int, wr []f64, wi []f64, mut vl []f64, ldvl_ int, mut vr []f64, ldvr_ int) {
 	mut vvl := 0.0
 	mut vvr := 0.0
 	mut ldvl := ldvl_
 	mut ldvr := ldvr_
-	if calc_vl {
+	if calc_vl == .left_ev_compute {
 		vvl = vl[0]
 	} else {
 		ldvl = 1
 	}
-	if calc_vr {
+	if calc_vr == .left_ev_compute {
 		vvr = vr[0]
 	} else {
 		ldvr = 1
 	}
-	unsafe {
-		info := C.LAPACKE_dgeev(.row_major, &char(blas.job_vlr(calc_vl).str().str), &char(blas.job_vlr(calc_vr).str().str),
-			n, &a[0], lda, &wr[0], &wi[0], &vvl, ldvl, &vvr, ldvr)
-		if info != 0 {
-			errors.vsl_panic('lapack failed', .efailed)
-		}
+
+	vl[0] = vvl
+	vr[0] = vvr
+
+	info := lapack64.dgeev(calc_vl, calc_vr, n, mut a, lda, wr, wi, mut vl, ldvl, mut
+		vr, ldvr)
+	if info != 0 {
+		errors.vsl_panic('lapack failed', .efailed)
 	}
 }

From bfa69075e9101c1f9408261853728812697004e1 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Tue, 18 Jun 2024 01:44:33 -0300
Subject: [PATCH 16/33] refactor: Replace constant lookup with a list of
 constants in ilaenv.v

---
 lapack/lapack64/ilaenv.v | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lapack/lapack64/ilaenv.v b/lapack/lapack64/ilaenv.v
index 08151271b..f9661e1c2 100644
--- a/lapack/lapack64/ilaenv.v
+++ b/lapack/lapack64/ilaenv.v
@@ -22,7 +22,7 @@ import math
 //
 // ilaenv is an internal routine. It is exported for testing purposes.
 fn ilaenv(ispec int, name string, opts string, n1 int, n2 int, n3 int, n4 int) int {
-	// TODO(btracey): Replace this with a constant lookup? A list of constants?
+	// TODO(ulises-jeremias): Replace this with a constant lookup? A list of constants?
 	sname := name[0] == `S` || name[0] == `D`
 	cname := name[0] == `C` || name[0] == `Z`
 	if !sname && !cname {

From 961475ec4d03f84667151e6a9b73c01400690501 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Tue, 18 Jun 2024 02:09:56 -0300
Subject: [PATCH 17/33] refactor: Update create_image_2d function to use local
 variable for format

---
 la/densesol.v                           |  4 +-
 la/matrix_ops.v                         | 10 +--
 lapack/lapack64/dgesv.v                 |  2 +-
 lapack/lapack64/dgetf2.v                | 66 ++++++++++++++++++++
 lapack/lapack64/dgetrf.v                | 23 ++++---
 lapack/lapack64/dgetri.v                |  4 +-
 lapack/lapack64/dgetrs.v                |  6 +-
 lapack/lapack64/dlaswp.v                | 44 +++++++++++++
 lapack/lapack64/iparmq.v                | 83 +++++++++++++++++++++++++
 lapack/lapack_d_vsl_lapack_lapacke.v    |  8 +--
 lapack/lapack_notd_vsl_lapack_lapacke.v | 12 ++--
 11 files changed, 229 insertions(+), 33 deletions(-)
 create mode 100644 lapack/lapack64/dgetf2.v
 create mode 100644 lapack/lapack64/dlaswp.v
 create mode 100644 lapack/lapack64/iparmq.v

diff --git a/la/densesol.v b/la/densesol.v
index 10395a902..3deed74da 100644
--- a/la/densesol.v
+++ b/la/densesol.v
@@ -15,6 +15,6 @@ pub fn den_solve(mut x []f64, a &Matrix[f64], b []f64, preserve_a bool) {
 	for i in 0 .. x.len {
 		x[i] = b[i]
 	}
-	ipiv := []int{len: a_.m}
-	lapack.dgesv(a_.m, 1, mut a_.data, a_.m, ipiv, mut x, 1)
+	mut ipiv := []int{len: a_.m}
+	lapack.dgesv(a_.m, 1, mut a_.data, a_.m, mut ipiv, mut x, 1)
 }
diff --git a/la/matrix_ops.v b/la/matrix_ops.v
index a792aeede..cbd6cb515 100644
--- a/la/matrix_ops.v
+++ b/la/matrix_ops.v
@@ -12,8 +12,8 @@ pub fn matrix_det(o &Matrix[f64]) f64 {
 			.efailed)
 	}
 	mut ai := o.data.clone()
-	ipiv := []int{len: int(math.min(o.m, o.n))}
-	lapack.dgetrf(o.m, o.n, mut ai, o.m, ipiv) // NOTE: ipiv are 1-based indices
+	mut ipiv := []int{len: int(math.min(o.m, o.n))}
+	lapack.dgetrf(o.m, o.n, mut ai, o.m, mut ipiv) // NOTE: ipiv are 1-based indices
 	mut det := 1.0
 	for i in 0 .. o.m {
 		if ipiv[i] - 1 == i { // NOTE: ipiv are 1-based indices
@@ -107,8 +107,8 @@ pub fn matrix_inv(mut ai Matrix[f64], mut a Matrix[f64], calc_det bool) f64 {
 	// square inverse
 	if a.m == a.n {
 		ai.data = a.data.clone()
-		ipiv := []int{len: int(math.min(a.m, a.n))}
-		lapack.dgetrf(a.m, a.n, mut ai.data, a.m, ipiv) // NOTE: ipiv are 1-based indices
+		mut ipiv := []int{len: int(math.min(a.m, a.n))}
+		lapack.dgetrf(a.m, a.n, mut ai.data, a.m, mut ipiv) // NOTE: ipiv are 1-based indices
 		if calc_det {
 			det = 1.0
 			for i := 0; i < a.m; i++ {
@@ -119,7 +119,7 @@ pub fn matrix_inv(mut ai Matrix[f64], mut a Matrix[f64], calc_det bool) f64 {
 				}
 			}
 		}
-		lapack.dgetri(a.n, mut ai.data, a.m, ipiv)
+		lapack.dgetri(a.n, mut ai.data, a.m, mut ipiv)
 		return det
 	}
 	// singular value decomposition
diff --git a/lapack/lapack64/dgesv.v b/lapack/lapack64/dgesv.v
index 94f948503..3c6834d5e 100644
--- a/lapack/lapack64/dgesv.v
+++ b/lapack/lapack64/dgesv.v
@@ -52,5 +52,5 @@ pub fn dgesv(n int, nrhs int, mut a []f64, lda int, mut ipiv []int, mut b []f64,
 	}
 
 	dgetrf(n, n, mut a, lda, mut ipiv)
-	dgetrs(.no_trans, n, nrhs, mut a, lda, ipiv, mut b, ldb)
+	dgetrs(.no_trans, n, nrhs, mut a, lda, mut ipiv, mut b, ldb)
 }
diff --git a/lapack/lapack64/dgetf2.v b/lapack/lapack64/dgetf2.v
new file mode 100644
index 000000000..2748c78d7
--- /dev/null
+++ b/lapack/lapack64/dgetf2.v
@@ -0,0 +1,66 @@
+module lapack64
+
+import math
+import vsl.blas
+
+pub fn dgetf2(m int, n int, mut a []f64, lda int, mut ipiv []int) {
+	mn := math.min(m, n)
+	if m < 0 {
+		panic(m_lt0)
+	} else if n < 0 {
+		panic(n_lt0)
+	} else if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	}
+
+	// Quick return if possible.
+	if mn == 0 {
+		return
+	}
+
+	if a.len < (m - 1) * lda + n {
+		panic(short_a)
+	} else if ipiv.len != mn {
+		panic(bad_len_ipiv)
+	}
+
+	sfmin := dlamch_s()
+
+	for j := 0; j < mn; j++ {
+		// Find a pivot and test for singularity.
+		jp := j + blas.idamax(m - j, a[j * lda + j..], lda)
+		ipiv[j] = jp
+		if a[jp * lda + j] == 0.0 {
+			panic('lapack: matrix is singular')
+		} else {
+			// Swap the rows if necessary.
+			if jp != j {
+				mut slice1 := unsafe { a[j * lda..] }
+				mut slice2 := unsafe { a[jp * lda..] }
+				blas.dswap(n, mut slice1, 1, mut slice2, 1)
+			}
+			if j < m - 1 {
+				aj := a[j * lda + j]
+				if math.abs(aj) >= sfmin {
+					mut slice3 := unsafe { a[(j + 1) * lda + j..] }
+					blas.dscal(m - j - 1, 1.0 / aj, mut slice3, lda)
+				} else {
+					for i := 0; i < m - j - 1; i++ {
+						a[(j + 1) * lda + j] /= aj
+					}
+				}
+			}
+		}
+		if j < mn - 1 {
+			mut slice4 := unsafe { a[(j + 1) * lda + j + 1..] }
+			blas.dger(m - j - 1, n - j - 1, -1.0, a[(j + 1) * lda + j..], lda, a[j * lda + j + 1..],
+				1, mut slice4, lda)
+		}
+	}
+}
+
+fn dlamch_s() f64 {
+	// Returns the safe minimum value (sfmin).
+	// This value is used as a threshold for detecting small values in the matrix.
+	return math.ldexp(1.0, -1022) // Smallest positive normal number.
+}
diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v
index 878c7b5b1..5a1dc8d29 100644
--- a/lapack/lapack64/dgetrf.v
+++ b/lapack/lapack64/dgetrf.v
@@ -53,7 +53,7 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, mut ipiv []int) {
 
 	if nb <= 1 || nb >= mn {
 		// use the unblocked algorithm.
-		dgetf2(m, n, mut a, lda, ipiv)
+		dgetf2(m, n, mut a, lda, mut ipiv)
 		return
 	}
 
@@ -61,28 +61,31 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, mut ipiv []int) {
 		jb := math.min(mn - j, nb)
 
 		// factor diagonal and subdiagonal blocks and test for exact singularity.
-		dgetf2(m - j, jb, mut a[j * lda + j..], lda, ipiv[j..j + jb])
+		mut slice1 := unsafe { ipiv[j..j + jb] }
+		dgetf2(m - j, jb, mut a[j * lda + j..], lda, mut slice1)
 
 		for i := j; i <= math.min(m - 1, j + jb - 1); i++ {
 			ipiv[i] += j
 		}
 
 		// apply interchanges to columns 1..j-1.
-		dlaswp(j, mut a, lda, j, j + jb - 1, ipiv[..j + jb], 1)
+		mut slice_ipiv1 := unsafe { ipiv[..j + jb] }
+		dlaswp(j, mut a, lda, j, j + jb - 1, mut slice_ipiv1, 1)
 
 		if j + jb < n {
 			// apply interchanges to columns 1..j-1.
-			mut slice1 := unsafe { a[j + jb..] }
-			dlaswp(j, mut slice1, lda, j, j + jb, ipiv[..j + jb], 1)
+			mut slice2 := unsafe { a[j + jb..] }
+			mut slice_ipiv2 := unsafe { ipiv[..j + jb] }
+			dlaswp(j, mut slice2, lda, j, j + jb, mut slice_ipiv2, 1)
 
-			mut slice2 := unsafe { a[j * lda + j + jb..] }
-			blas.dtstrf(.left, false, false, .unit, jb, n - j - jb, 1, a[j * lda + j..],
-				lda, mut slice2, lda)
+			mut slice3 := unsafe { a[j * lda + j + jb..] }
+			blas.dtrsm(.left, false, false, .unit, jb, n - j - jb, 1, a[j * lda + j..],
+				lda, mut slice3, lda)
 
 			if j + jb < m {
-				mut slice3 := unsafe { a[(j + jb) * lda + j + jb..] }
+				mut slice4 := unsafe { a[(j + jb) * lda + j + jb..] }
 				blas.dgemm(false, false, m - j - jb, n - j - jb, jb, -1, a[(j + jb) * lda + j..],
-					lda, a[j * lda + j + jb..], lda, 1, mut slice3, lda)
+					lda, a[j * lda + j + jb..], lda, 1, mut slice4, lda)
 			}
 		}
 	}
diff --git a/lapack/lapack64/dgetri.v b/lapack/lapack64/dgetri.v
index 8cd14300e..0b466ad6a 100644
--- a/lapack/lapack64/dgetri.v
+++ b/lapack/lapack64/dgetri.v
@@ -4,7 +4,7 @@ import math
 import vsl.blas
 
 // dgetri computes the inverse of a matrix using the LU factorization computed by dgetrf.
-pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) int {
+pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) int {
 	if n == 0 {
 		return 0
 	}
@@ -26,6 +26,6 @@ pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) int {
 	}
 
 	// Placeholder for the actual LAPACK function calls
-	// Example: info = dgetri(n, a, lda, ipiv, work, lwork)
+	// Example: info = dgetri(n, a, lda, mut ipiv, work, lwork)
 	return info
 }
diff --git a/lapack/lapack64/dgetrs.v b/lapack/lapack64/dgetrs.v
index bbbc586b0..9e1424600 100644
--- a/lapack/lapack64/dgetrs.v
+++ b/lapack/lapack64/dgetrs.v
@@ -16,7 +16,7 @@ import vsl.blas
 //
 // a and ipiv contain the LU factorization of A and the permutation indices as
 // computed by Dgetrf. ipiv is zero-indexed.
-pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb int) {
+pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []f64, lda int, mut ipiv []int, mut b []f64, ldb int) {
 	if trans != .no_trans && trans != .trans && trans != .conj_trans {
 		panic(bad_trans)
 	}
@@ -50,7 +50,7 @@ pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []f64, lda int, ipiv
 
 	if trans != .no_trans {
 		// Solve A * X = B.
-		dlaswp(nrhs, b, ldb, 0, n - 1, ipiv, 1)
+		dlaswp(nrhs, mut b, ldb, 0, n - 1, mut ipiv, 1)
 		// Solve L * X = B, overwriting B with X.
 		blas.dtrsm(.left, false, false, .unit, n, nrhs, 1, a, lda, mut b, ldb)
 		// Solve U * X = B, overwriting B with X.
@@ -62,5 +62,5 @@ pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []f64, lda int, ipiv
 	blas.dtrsm(.left, true, true, .non_unit, n, nrhs, 1, a, lda, mut b, ldb)
 	// Solve Lᵀ * X = B, overwriting B with X.
 	blas.dtrsm(.left, false, true, .unit, n, nrhs, 1, a, lda, mut b, ldb)
-	dlaswp(nrhs, b, ldb, 0, n - 1, ipiv, -1)
+	dlaswp(nrhs, mut b, ldb, 0, n - 1, mut ipiv, -1)
 }
diff --git a/lapack/lapack64/dlaswp.v b/lapack/lapack64/dlaswp.v
new file mode 100644
index 000000000..5f6a53c8d
--- /dev/null
+++ b/lapack/lapack64/dlaswp.v
@@ -0,0 +1,44 @@
+module lapack64
+
+import math
+import vsl.blas
+
+pub fn dlaswp(n int, mut a []f64, lda int, k1 int, k2 int, mut ipiv []int, incx int) {
+	if n < 0 {
+		panic(n_lt0)
+	} else if k1 < 0 {
+		panic(bad_k1)
+	} else if k2 < k1 {
+		panic(bad_k2)
+	} else if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	} else if a.len < k2 * lda + n {
+		// A must have at least k2+1 rows.
+		panic(short_a)
+	} else if ipiv.len != k2 + 1 {
+		panic(bad_len_ipiv)
+	} else if incx != 1 && incx != -1 {
+		panic(abs_inc_not_one)
+	}
+
+	if n == 0 {
+		return
+	}
+
+	if incx == 1 {
+		for k := k1; k <= k2; k++ {
+			if k == ipiv[k] {
+				continue
+			}
+			blas.dswap(n, mut a[k * lda..], 1, mut a[ipiv[k] * lda..], 1)
+		}
+		return
+	}
+
+	for k := k2; k >= k1; k-- {
+		if k == ipiv[k] {
+			continue
+		}
+		blas.dswap(n, mut a[k * lda..], 1, mut a[ipiv[k] * lda..], 1)
+	}
+}
diff --git a/lapack/lapack64/iparmq.v b/lapack/lapack64/iparmq.v
new file mode 100644
index 000000000..5917d7109
--- /dev/null
+++ b/lapack/lapack64/iparmq.v
@@ -0,0 +1,83 @@
+module lapack64
+
+import math
+
+fn iparmq(ispec int, name string, opts string, n int, ilo int, ihi int, lwork int) int {
+	nh := ihi - ilo + 1
+	mut ns := 2
+	if nh >= 30 {
+		ns = 4
+	} else if nh >= 60 {
+		ns = 10
+	} else if nh >= 150 {
+		ns = math.max(10, nh / int(math.log(nh) / math.ln2))
+	} else if nh >= 590 {
+		ns = 64
+	} else if nh >= 3000 {
+		ns = 128
+	} else if nh >= 6000 {
+		ns = 256
+	}
+	ns = math.max(2, ns - (ns % 2))
+
+	match ispec {
+		12 {
+			// Matrices of order smaller than nmin get sent to Dlahqr, the
+			// classic double shift algorithm. This must be at least 11.
+			nmin := 75
+			return nmin
+		}
+		13 {
+			knwswp := 500
+			if nh <= knwswp {
+				return ns
+			}
+			return 3 * ns / 2
+		}
+		14 {
+			// Skip a computationally expensive multi-shift QR sweep with
+			// Dlaqr5 whenever aggressive early deflation finds at least
+			// nibble*(window size)/100 deflations. The default, small,
+			// value reflects the expectation that the cost of looking
+			// through the deflation window with Dlaqr3 will be
+			// substantially smaller.
+			nibble := 14
+			return nibble
+		}
+		15 {
+			return ns
+		}
+		16 {
+			if name.len != 6 {
+				panic('bad name length')
+			}
+			k22min := 14
+			kacmin := 14
+			mut acc22 := 0
+			if name[1..].starts_with('GGHRD') || name[1..].starts_with('GGHD3') {
+				acc22 = 1
+				if nh >= k22min {
+					acc22 = 2
+				}
+			} else if name[3..].starts_with('EXC') {
+				if nh >= kacmin {
+					acc22 = 1
+				}
+				if nh >= k22min {
+					acc22 = 2
+				}
+			} else if name[1..].starts_with('HSEQR') || name[1..5].starts_with('LAQR') {
+				if ns >= kacmin {
+					acc22 = 1
+				}
+				if ns >= k22min {
+					acc22 = 2
+				}
+			}
+			return acc22
+		}
+		else {
+			panic('bad ispec')
+		}
+	}
+}
diff --git a/lapack/lapack_d_vsl_lapack_lapacke.v b/lapack/lapack_d_vsl_lapack_lapacke.v
index cbc7d9839..9bc8314d9 100644
--- a/lapack/lapack_d_vsl_lapack_lapacke.v
+++ b/lapack/lapack_d_vsl_lapack_lapacke.v
@@ -43,11 +43,11 @@ fn C.LAPACKE_dgehrd(matrix_layout blas.MemoryLayout, n int, ilo int, ihi int, a
 // system of equations A * X = B.
 //
 // NOTE: matrix 'a' will be modified
-pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb int) {
+pub fn dgesv(n int, nrhs int, mut a []f64, lda int, mut ipiv []int, mut b []f64, ldb int) {
 	if ipiv.len != n {
 		errors.vsl_panic('ipiv.len must be equal to n. ${ipiv.len} != ${n}\n', .efailed)
 	}
-	info := C.LAPACKE_dgesv(.row_major, n, nrhs, unsafe { &a[0] }, lda, &ipiv[0], unsafe { &b[0] },
+	info := C.LAPACKE_dgesv(.row_major, n, nrhs, unsafe { &a[0] }, lda, unsafe { &ipiv[0] }, unsafe { &b[0] },
 		ldb)
 	if info != 0 {
 		errors.vsl_panic('lapack failed', .efailed)
@@ -98,7 +98,7 @@ pub fn dgesvd(jobu SVDJob, jobvt SVDJob, m int, n int, mut a []f64, lda int, s [
 //
 // NOTE: (1) matrix 'a' will be modified
 // (2) ipiv indices are 1-based (i.e. Fortran)
-pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) {
+pub fn dgetrf(m int, n int, mut a []f64, lda int, mut ipiv []int) {
 	unsafe {
 		info := C.LAPACKE_dgetrf(.row_major, m, n, &a[0], lda, &ipiv[0])
 		if info != 0 {
@@ -115,7 +115,7 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) {
 //
 // This method inverts U and then computes inv(A) by solving the system
 // inv(A)*L = inv(U) for inv(A).
-pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) {
+pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) {
 	unsafe {
 		info := C.LAPACKE_dgetri(.row_major, n, &a[0], lda, &ipiv[0])
 		if info != 0 {
diff --git a/lapack/lapack_notd_vsl_lapack_lapacke.v b/lapack/lapack_notd_vsl_lapack_lapacke.v
index 39d2cbdd6..2730f4ef6 100644
--- a/lapack/lapack_notd_vsl_lapack_lapacke.v
+++ b/lapack/lapack_notd_vsl_lapack_lapacke.v
@@ -27,8 +27,8 @@ import vsl.lapack.lapack64
 //
 // NOTE: matrix 'a' will be modified
 @[inline]
-pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb int) {
-	lapack64.dgesv(n, nrhs, mut a, lda, ipiv, mut b, ldb)
+pub fn dgesv(n int, nrhs int, mut a []f64, lda int, mut ipiv []int, mut b []f64, ldb int) {
+	lapack64.dgesv(n, nrhs, mut a, lda, mut ipiv, mut b, ldb)
 }
 
 // dgesvd computes the singular value decomposition (SVD) of a real M-by-N matrix A, optionally computing the left and/or right singular vectors.
@@ -75,8 +75,8 @@ pub fn dgesvd(jobu SVDJob, jobvt SVDJob, m int, n int, mut a []f64, lda int, s [
 //
 // NOTE: (1) matrix 'a' will be modified
 // (2) ipiv indices are 1-based (i.e. Fortran)
-pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) {
-	lapack64.dgetrf(m, n, mut a, lda, ipiv)
+pub fn dgetrf(m int, n int, mut a []f64, lda int, mut ipiv []int) {
+	lapack64.dgetrf(m, n, mut a, lda, mut ipiv)
 }
 
 // dgetri computes the inverse of a matrix using the LU factorization computed by DGETRF.
@@ -87,8 +87,8 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) {
 //
 // This method inverts U and then computes inv(A) by solving the system
 // inv(A)*L = inv(U) for inv(A).
-pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) {
-	info := lapack64.dgetri(n, mut a, lda, ipiv)
+pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) {
+	info := lapack64.dgetri(n, mut a, lda, mut ipiv)
 	if info != 0 {
 		errors.vsl_panic('lapack failed', .efailed)
 	}

From f6cb78224ad9bf1301e226a3e785651a059a2f37 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Tue, 18 Jun 2024 02:11:33 -0300
Subject: [PATCH 18/33] refactor: Update execute tests step in ci.yml to use
 Pure C Backend with LAPACKE

---
 .github/workflows/ci.yml | 8 ++++----
 bin/test                 | 6 ++++++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7b943ecb4..9ba2cf481 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -67,8 +67,8 @@ jobs:
       - name: Execute Tests using Pure V Backend
         run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }}
 
-      - name: Execute Tests using Pure V Backend with Pure C Blas
-        run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} --use-cblas
+      - name: Execute Tests using Pure V Backend with Pure C Backend
+        run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} --use-cblas --use-lapacke
 
   run-tests-on-macos:
     runs-on: ${{ matrix.os }}
@@ -113,5 +113,5 @@ jobs:
       - name: Execute Tests using Pure V Backend
         run: ~/.vmodules/vsl/bin/test
 
-      - name: Execute Tests using Pure V Backend with Pure C Blas
-        run: ~/.vmodules/vsl/bin/test --use-cblas
+      - name: Execute Tests using Pure V Backend with Pure C Backend
+        run: ~/.vmodules/vsl/bin/test --use-cblas --use-lapacke
diff --git a/bin/test b/bin/test
index d4e1878ed..1c171926d 100755
--- a/bin/test
+++ b/bin/test
@@ -10,6 +10,7 @@
 ##         --stats                           Execute with stats
 ##         --prod                            Execute with prod build
 ##         --use-cblas                       Execute tests using cblas
+##         --use-lapacke                     Execute tests using lapacke
 ##         --use-autofree                    Execute tests using atofree
 ##         --use-gc=STRATEGY                 Execute tests using garbage collector
 ##         --skip-examples                   Skip examples compilation
@@ -31,6 +32,11 @@ if [[ -n "${use_cblas}" ]]; then
     flags="${flags} -d vsl_blas_cblas"
 fi
 
+if [[ -n "${use_lapacke}" ]]; then
+    echo "Running tests using LAPACKE"
+    flags="${flags} -d vsl_lapack_lapacke"
+fi
+
 if [[ -n "${use_autofree}" ]]; then
     echo "Running tests using V Math"
     flags="${flags} -autofree"

From 569a96b9cca7bc33093350d704f15393460548e0 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Tue, 18 Jun 2024 02:17:47 -0300
Subject: [PATCH 19/33] refactor: Comment out test execution step in ci.yml

---
 .github/workflows/ci.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9ba2cf481..1cdfa15e6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -64,8 +64,8 @@ jobs:
       - name: Move VSL source code to V Modules
         run: mv ./vsl ~/.vmodules
 
-      - name: Execute Tests using Pure V Backend
-        run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }}
+      # - name: Execute Tests using Pure V Backend
+      #   run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }}
 
       - name: Execute Tests using Pure V Backend with Pure C Backend
         run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} --use-cblas --use-lapacke
@@ -110,8 +110,8 @@ jobs:
       - name: Move VSL source code to V Modules
         run: mv ./vsl ~/.vmodules
 
-      - name: Execute Tests using Pure V Backend
-        run: ~/.vmodules/vsl/bin/test
+      # - name: Execute Tests using Pure V Backend
+      #   run: ~/.vmodules/vsl/bin/test
 
       - name: Execute Tests using Pure V Backend with Pure C Backend
         run: ~/.vmodules/vsl/bin/test --use-cblas --use-lapacke

From d8a4fc2ac24528926beb1ced1449094d5a46fa62 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Tue, 18 Jun 2024 02:33:20 -0300
Subject: [PATCH 20/33] refactor: Update create_image_2d function to use local
 variable for format

---
 blas/blas64/dgemm.v                  |  21 +-
 blas/conversions.v                   |  20 --
 blas/oblas_d_vsl_blas_cblas.v        | 428 +++++++++++++--------------
 blas/oblas_notd_vsl_blas_cblas.v     |  25 +-
 float/float64/gemv_test.v            |  10 +-
 la/blas.v                            |  34 +--
 lapack/lapack64/dgetrf.v             |   6 +-
 lapack/lapack64/dgetrs.v             |   8 +-
 lapack/lapack_d_vsl_lapack_lapacke.v |   8 +-
 9 files changed, 261 insertions(+), 299 deletions(-)

diff --git a/blas/blas64/dgemm.v b/blas/blas64/dgemm.v
index f7bcfeb5e..86ef17acc 100644
--- a/blas/blas64/dgemm.v
+++ b/blas/blas64/dgemm.v
@@ -99,10 +99,11 @@ pub fn dgemm(trans_a Transpose, trans_b Transpose, m int, n int, k int, alpha f6
 		}
 	}
 
-	dgemm_parallel(a_trans, b_trans, m, n, k, a, lda, b, ldb, mut c, ldc, alpha)
+	dgemm_parallel(if a_trans { .trans } else { .no_trans }, if b_trans { .trans } else { .no_trans },
+		m, n, k, a, lda, b, ldb, mut c, ldc, alpha)
 }
 
-fn dgemm_parallel(a_trans bool, b_trans bool, m int, n int, k int, a []f64, lda int, b []f64, ldb int, mut c []f64, ldc int, alpha f64) {
+fn dgemm_parallel(a_trans Transpose, b_trans Transpose, m int, n int, k int, a []f64, lda int, b []f64, ldb int, mut c []f64, ldc int, alpha f64) {
 	// dgemm_parallel computes a parallel matrix multiplication by partitioning
 	// a and b into sub-blocks, and updating c with the multiplication of the sub-block
 	// In all cases,
@@ -155,7 +156,7 @@ fn dgemm_parallel(a_trans bool, b_trans bool, m int, n int, k int, a []f64, lda
 	for i := 0; i < m; i += block_size {
 		for j := 0; j < n; j += block_size {
 			// worker_limit <- 0
-			go fn (a_trans bool, b_trans bool, m int, n int, max_k_len int, a []f64, lda int, b []f64, ldb int, mut c []f64, ldc int, alpha f64, i int, j int, mut wg sync.WaitGroup) {
+			go fn (a_trans Transpose, b_trans Transpose, m int, n int, max_k_len int, a []f64, lda int, b []f64, ldb int, mut c []f64, ldc int, alpha f64, i int, j int, mut wg sync.WaitGroup) {
 				defer {
 					wg.done()
 					// <-worker_limit
@@ -180,12 +181,12 @@ fn dgemm_parallel(a_trans bool, b_trans bool, m int, n int, k int, a []f64, lda
 					}
 					mut a_sub := []f64{}
 					mut b_sub := []f64{}
-					if a_trans {
+					if a_trans == .trans {
 						a_sub = slice_view_f64(a, lda, k, i, lenk, leni)
 					} else {
 						a_sub = slice_view_f64(a, lda, i, k, leni, lenk)
 					}
-					if b_trans {
+					if b_trans == .trans {
 						b_sub = slice_view_f64(b, ldb, j, k, lenj, lenk)
 					} else {
 						b_sub = slice_view_f64(b, ldb, k, j, lenk, lenj)
@@ -200,20 +201,20 @@ fn dgemm_parallel(a_trans bool, b_trans bool, m int, n int, k int, a []f64, lda
 }
 
 // dgemm_serial is serial matrix multiply
-fn dgemm_serial(a_trans bool, b_trans bool, m int, n int, k int, a []f64, lda int, b []f64, ldb int, mut c []f64, ldc int, alpha f64) {
-	if !a_trans && !b_trans {
+fn dgemm_serial(a_trans Transpose, b_trans Transpose, m int, n int, k int, a []f64, lda int, b []f64, ldb int, mut c []f64, ldc int, alpha f64) {
+	if a_trans != .trans && b_trans != .trans {
 		dgemm_serial_not_not(m, n, k, a, lda, b, ldb, mut c, ldc, alpha)
 		return
 	}
-	if a_trans && !b_trans {
+	if a_trans == .trans && b_trans != .trans {
 		dgemm_serial_trans_not(m, n, k, a, lda, b, ldb, mut c, ldc, alpha)
 		return
 	}
-	if !a_trans && b_trans {
+	if a_trans != .trans && b_trans == .trans {
 		dgemm_serial_not_trans(m, n, k, a, lda, b, ldb, mut c, ldc, alpha)
 		return
 	}
-	if a_trans && b_trans {
+	if a_trans == .trans && b_trans == .trans {
 		dgemm_serial_trans_trans(m, n, k, a, lda, b, ldb, mut c, ldc, alpha)
 		return
 	}
diff --git a/blas/conversions.v b/blas/conversions.v
index 553bcf16d..b421fa60c 100644
--- a/blas/conversions.v
+++ b/blas/conversions.v
@@ -21,26 +21,6 @@ pub type Diagonal = blas64.Diagonal
 // Side is used to specify whether a matrix is on the left or right side in a matrix-matrix multiplication.
 pub type Side = blas64.Side
 
-// c_trans is a helper function to convert bool to Transpose
-pub fn c_trans(trans bool) Transpose {
-	return if trans { .trans } else { .no_trans }
-}
-
-// c_uplo is a helper function to convert bool to Uplo
-pub fn c_uplo(up bool) Uplo {
-	return if up { .upper } else { .lower }
-}
-
-// l_uplo is a helper function to convert bool to Uplo
-pub fn l_uplo(up bool) u8 {
-	return if up { `U` } else { `L` }
-}
-
-// job_vlr is a helper function to convert bool to char
-pub fn job_vlr(do_calc bool) rune {
-	return if do_calc { `V` } else { `N` }
-}
-
 // slice_to_col_major converts nested slice into an array representing a col-major matrix
 //
 // _**NOTE**: make sure to have at least 1x1 item_
diff --git a/blas/oblas_d_vsl_blas_cblas.v b/blas/oblas_d_vsl_blas_cblas.v
index 777038f3d..653a6c171 100644
--- a/blas/oblas_d_vsl_blas_cblas.v
+++ b/blas/oblas_d_vsl_blas_cblas.v
@@ -472,25 +472,25 @@ pub fn zdscal(n int, alpha f64, mut x voidptr, incx int) {
 }
 
 @[inline]
-pub fn sgemv(trans bool, m int, n int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) {
-	C.cblas_sgemv(.row_major, c_trans(trans), m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
+pub fn sgemv(trans Transpose, m int, n int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) {
+	C.cblas_sgemv(.row_major, trans, m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
 		incx, beta, unsafe { &y[0] }, incy)
 }
 
 @[inline]
-pub fn dgemv(trans bool, m int, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
-	C.cblas_dgemv(.row_major, c_trans(trans), m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
+pub fn dgemv(trans Transpose, m int, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
+	C.cblas_dgemv(.row_major, trans, m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
 		incx, beta, unsafe { &y[0] }, incy)
 }
 
 @[inline]
-pub fn cgemv(trans bool, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
-	C.cblas_cgemv(.row_major, c_trans(trans), m, n, alpha, a, lda, x, incx, beta, y, incy)
+pub fn cgemv(trans Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_cgemv(.row_major, trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
 }
 
 @[inline]
-pub fn zgemv(trans bool, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
-	C.cblas_zgemv(.row_major, c_trans(trans), m, n, alpha, a, lda, x, incx, beta, y, incy)
+pub fn zgemv(trans Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_zgemv(.row_major, trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
 }
 
 @[inline]
@@ -526,443 +526,427 @@ pub fn zgerc(m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy i
 }
 
 @[inline]
-pub fn strsv(uplo bool, trans bool, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) {
-	C.cblas_strsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &a[0] },
-		lda, unsafe { &x[0] }, incx)
+pub fn strsv(uplo Uplo, trans Transpose, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) {
+	C.cblas_strsv(.row_major, uplo, trans, diag, n, unsafe { &a[0] }, lda, unsafe { &x[0] },
+		incx)
 }
 
 @[inline]
-pub fn dtrsv(uplo bool, trans bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
-	C.cblas_dtrsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &a[0] },
-		lda, unsafe { &x[0] }, incx)
+pub fn dtrsv(uplo Uplo, trans Transpose, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
+	C.cblas_dtrsv(.row_major, uplo, trans, diag, n, unsafe { &a[0] }, lda, unsafe { &x[0] },
+		incx)
 }
 
 @[inline]
-pub fn ctrsv(uplo bool, trans bool, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) {
-	C.cblas_ctrsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, a, lda, x, incx)
+pub fn ctrsv(uplo Uplo, trans Transpose, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) {
+	C.cblas_ctrsv(.row_major, uplo, trans, diag, n, a, lda, x, incx)
 }
 
 @[inline]
-pub fn ztrsv(uplo bool, trans bool, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) {
-	C.cblas_ztrsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, a, lda, x, incx)
+pub fn ztrsv(uplo Uplo, trans Transpose, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) {
+	C.cblas_ztrsv(.row_major, uplo, trans, diag, n, a, lda, x, incx)
 }
 
 @[inline]
-pub fn strmv(uplo bool, trans bool, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) {
-	C.cblas_strmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &a[0] },
-		lda, unsafe { &x[0] }, incx)
+pub fn strmv(uplo Uplo, trans Transpose, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) {
+	C.cblas_strmv(.row_major, uplo, trans, diag, n, unsafe { &a[0] }, lda, unsafe { &x[0] },
+		incx)
 }
 
 @[inline]
-pub fn dtrmv(uplo bool, trans bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
-	C.cblas_dtrmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &a[0] },
-		lda, unsafe { &x[0] }, incx)
+pub fn dtrmv(uplo Uplo, trans Transpose, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
+	C.cblas_dtrmv(.row_major, uplo, trans, diag, n, unsafe { &a[0] }, lda, unsafe { &x[0] },
+		incx)
 }
 
 @[inline]
-pub fn ctrmv(uplo bool, trans bool, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) {
-	C.cblas_ctrmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, a, lda, x, incx)
+pub fn ctrmv(uplo Uplo, trans Transpose, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) {
+	C.cblas_ctrmv(.row_major, uplo, trans, diag, n, a, lda, x, incx)
 }
 
 @[inline]
-pub fn ztrmv(uplo bool, trans bool, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) {
-	C.cblas_ztrmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, a, lda, x, incx)
+pub fn ztrmv(uplo Uplo, trans Transpose, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) {
+	C.cblas_ztrmv(.row_major, uplo, trans, diag, n, a, lda, x, incx)
 }
 
 @[inline]
-pub fn ssyr(uplo bool, n int, alpha f32, x []f32, incx int, mut a []f32, lda int) {
-	C.cblas_ssyr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] },
+pub fn ssyr(uplo Uplo, n int, alpha f32, x []f32, incx int, mut a []f32, lda int) {
+	C.cblas_ssyr(.row_major, uplo, n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] },
 		lda)
 }
 
 @[inline]
-pub fn dsyr(uplo bool, n int, alpha f64, x []f64, incx int, mut a []f64, lda int) {
-	C.cblas_dsyr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] },
+pub fn dsyr(uplo Uplo, n int, alpha f64, x []f64, incx int, mut a []f64, lda int) {
+	C.cblas_dsyr(.row_major, uplo, n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] },
 		lda)
 }
 
 @[inline]
-pub fn cher(uplo bool, n int, alpha f32, x voidptr, incx int, mut a voidptr, lda int) {
-	C.cblas_cher(.row_major, c_uplo(uplo), n, alpha, x, incx, a, lda)
+pub fn cher(uplo Uplo, n int, alpha f32, x voidptr, incx int, mut a voidptr, lda int) {
+	C.cblas_cher(.row_major, uplo, n, alpha, x, incx, a, lda)
 }
 
 @[inline]
-pub fn zher(uplo bool, n int, alpha f64, x voidptr, incx int, mut a voidptr, lda int) {
-	C.cblas_zher(.row_major, c_uplo(uplo), n, alpha, x, incx, a, lda)
+pub fn zher(uplo Uplo, n int, alpha f64, x voidptr, incx int, mut a voidptr, lda int) {
+	C.cblas_zher(.row_major, uplo, n, alpha, x, incx, a, lda)
 }
 
 @[inline]
-pub fn ssyr2(uplo bool, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32, lda int) {
-	C.cblas_ssyr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] },
+pub fn ssyr2(uplo Uplo, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32, lda int) {
+	C.cblas_ssyr2(.row_major, uplo, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] },
 		incy, unsafe { &a[0] }, lda)
 }
 
 @[inline]
-pub fn dsyr2(uplo bool, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) {
-	C.cblas_dsyr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] },
+pub fn dsyr2(uplo Uplo, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) {
+	C.cblas_dsyr2(.row_major, uplo, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] },
 		incy, unsafe { &a[0] }, lda)
 }
 
 @[inline]
-pub fn cher2(uplo bool, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) {
-	C.cblas_cher2(.row_major, c_uplo(uplo), n, alpha, x, incx, y, incy, a, lda)
+pub fn cher2(uplo Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) {
+	C.cblas_cher2(.row_major, uplo, n, alpha, x, incx, y, incy, a, lda)
 }
 
 @[inline]
-pub fn zher2(uplo bool, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) {
-	C.cblas_zher2(.row_major, c_uplo(uplo), n, alpha, x, incx, y, incy, a, lda)
+pub fn zher2(uplo Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) {
+	C.cblas_zher2(.row_major, uplo, n, alpha, x, incx, y, incy, a, lda)
 }
 
 @[inline]
-pub fn sgbmv(trans bool, m int, n int, kl int, ku int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) {
-	C.cblas_sgbmv(.row_major, c_trans(trans), m, n, kl, ku, alpha, unsafe { &a[0] }, lda,
-		unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy)
+pub fn sgbmv(trans Transpose, m int, n int, kl int, ku int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) {
+	C.cblas_sgbmv(.row_major, trans, m, n, kl, ku, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
+		incx, beta, unsafe { &y[0] }, incy)
 }
 
 @[inline]
-pub fn dgbmv(trans bool, m int, n int, kl int, ku int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
-	C.cblas_dgbmv(.row_major, c_trans(trans), m, n, kl, ku, alpha, unsafe { &a[0] }, lda,
-		unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy)
+pub fn dgbmv(trans Transpose, m int, n int, kl int, ku int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
+	C.cblas_dgbmv(.row_major, trans, m, n, kl, ku, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
+		incx, beta, unsafe { &y[0] }, incy)
 }
 
 @[inline]
-pub fn cgbmv(trans bool, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
-	C.cblas_cgbmv(.row_major, c_trans(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta,
-		y, incy)
+pub fn cgbmv(trans Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_cgbmv(.row_major, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy)
 }
 
 @[inline]
-pub fn zgbmv(trans bool, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
-	C.cblas_zgbmv(.row_major, c_trans(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta,
-		y, incy)
+pub fn zgbmv(trans Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_zgbmv(.row_major, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy)
 }
 
 @[inline]
-pub fn ssbmv(uplo bool, n int, k int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) {
-	C.cblas_ssbmv(.row_major, c_uplo(uplo), n, k, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
+pub fn ssbmv(uplo Uplo, n int, k int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) {
+	C.cblas_ssbmv(.row_major, uplo, n, k, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
 		incx, beta, unsafe { &y[0] }, incy)
 }
 
 @[inline]
-pub fn dsbmv(uplo bool, n int, k int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
-	C.cblas_dsbmv(.row_major, c_uplo(uplo), n, k, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
+pub fn dsbmv(uplo Uplo, n int, k int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
+	C.cblas_dsbmv(.row_major, uplo, n, k, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
 		incx, beta, unsafe { &y[0] }, incy)
 }
 
 @[inline]
-pub fn stbmv(uplo bool, trans bool, diag Diagonal, n int, k int, a []f32, lda int, mut x []f32, incx int) {
-	C.cblas_stbmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, unsafe { &a[0] },
-		lda, unsafe { &x[0] }, incx)
+pub fn stbmv(uplo Uplo, trans Transpose, diag Diagonal, n int, k int, a []f32, lda int, mut x []f32, incx int) {
+	C.cblas_stbmv(.row_major, uplo, trans, diag, n, k, unsafe { &a[0] }, lda, unsafe { &x[0] },
+		incx)
 }
 
 @[inline]
-pub fn dtbmv(uplo bool, trans bool, diag Diagonal, n int, k int, a []f64, lda int, mut x []f64, incx int) {
-	C.cblas_dtbmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, unsafe { &a[0] },
-		lda, unsafe { &x[0] }, incx)
+pub fn dtbmv(uplo Uplo, trans Transpose, diag Diagonal, n int, k int, a []f64, lda int, mut x []f64, incx int) {
+	C.cblas_dtbmv(.row_major, uplo, trans, diag, n, k, unsafe { &a[0] }, lda, unsafe { &x[0] },
+		incx)
 }
 
 @[inline]
-pub fn ctbmv(uplo bool, trans bool, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) {
-	C.cblas_ctbmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, a, lda, x, incx)
+pub fn ctbmv(uplo Uplo, trans Transpose, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) {
+	C.cblas_ctbmv(.row_major, uplo, trans, diag, n, k, a, lda, x, incx)
 }
 
 @[inline]
-pub fn ztbmv(uplo bool, trans bool, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) {
-	C.cblas_ztbmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, a, lda, x, incx)
+pub fn ztbmv(uplo Uplo, trans Transpose, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) {
+	C.cblas_ztbmv(.row_major, uplo, trans, diag, n, k, a, lda, x, incx)
 }
 
 @[inline]
-pub fn stbsv(uplo bool, trans bool, diag Diagonal, n int, k int, a []f32, lda int, mut x []f32, incx int) {
-	C.cblas_stbsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, unsafe { &a[0] },
-		lda, unsafe { &x[0] }, incx)
+pub fn stbsv(uplo Uplo, trans Transpose, diag Diagonal, n int, k int, a []f32, lda int, mut x []f32, incx int) {
+	C.cblas_stbsv(.row_major, uplo, trans, diag, n, k, unsafe { &a[0] }, lda, unsafe { &x[0] },
+		incx)
 }
 
 @[inline]
-pub fn dtbsv(uplo bool, trans bool, diag Diagonal, n int, k int, a []f64, lda int, mut x []f64, incx int) {
-	C.cblas_dtbsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, unsafe { &a[0] },
-		lda, unsafe { &x[0] }, incx)
+pub fn dtbsv(uplo Uplo, trans Transpose, diag Diagonal, n int, k int, a []f64, lda int, mut x []f64, incx int) {
+	C.cblas_dtbsv(.row_major, uplo, trans, diag, n, k, unsafe { &a[0] }, lda, unsafe { &x[0] },
+		incx)
 }
 
 @[inline]
-pub fn ctbsv(uplo bool, trans bool, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) {
-	C.cblas_ctbsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, a, lda, x, incx)
+pub fn ctbsv(uplo Uplo, trans Transpose, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) {
+	C.cblas_ctbsv(.row_major, uplo, trans, diag, n, k, a, lda, x, incx)
 }
 
 @[inline]
-pub fn ztbsv(uplo bool, trans bool, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) {
-	C.cblas_ztbsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, a, lda, x, incx)
+pub fn ztbsv(uplo Uplo, trans Transpose, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) {
+	C.cblas_ztbsv(.row_major, uplo, trans, diag, n, k, a, lda, x, incx)
 }
 
 @[inline]
-pub fn stpmv(uplo bool, trans bool, diag Diagonal, n int, ap []f32, mut x []f32, incx int) {
-	C.cblas_stpmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &ap[0] },
-		unsafe { &x[0] }, incx)
+pub fn stpmv(uplo Uplo, trans Transpose, diag Diagonal, n int, ap []f32, mut x []f32, incx int) {
+	C.cblas_stpmv(.row_major, uplo, trans, diag, n, unsafe { &ap[0] }, unsafe { &x[0] },
+		incx)
 }
 
 @[inline]
-pub fn dtpmv(uplo bool, trans bool, diag Diagonal, n int, ap []f64, mut x []f64, incx int) {
-	C.cblas_dtpmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &ap[0] },
-		unsafe { &x[0] }, incx)
+pub fn dtpmv(uplo Uplo, trans Transpose, diag Diagonal, n int, ap []f64, mut x []f64, incx int) {
+	C.cblas_dtpmv(.row_major, uplo, trans, diag, n, unsafe { &ap[0] }, unsafe { &x[0] },
+		incx)
 }
 
 @[inline]
-pub fn ctpmv(uplo bool, trans bool, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) {
-	C.cblas_ctpmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, ap, x, incx)
+pub fn ctpmv(uplo Uplo, trans Transpose, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) {
+	C.cblas_ctpmv(.row_major, uplo, trans, diag, n, ap, x, incx)
 }
 
 @[inline]
-pub fn ztpmv(uplo bool, trans bool, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) {
-	C.cblas_ztpmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, ap, x, incx)
+pub fn ztpmv(uplo Uplo, trans Transpose, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) {
+	C.cblas_ztpmv(.row_major, uplo, trans, diag, n, ap, x, incx)
 }
 
 @[inline]
-pub fn stpsv(uplo bool, trans bool, diag Diagonal, n int, ap []f32, mut x []f32, incx int) {
-	C.cblas_stpsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &ap[0] },
-		unsafe { &x[0] }, incx)
+pub fn stpsv(uplo Uplo, trans Transpose, diag Diagonal, n int, ap []f32, mut x []f32, incx int) {
+	C.cblas_stpsv(.row_major, uplo, trans, diag, n, unsafe { &ap[0] }, unsafe { &x[0] },
+		incx)
 }
 
 @[inline]
-pub fn dtpsv(uplo bool, trans bool, diag Diagonal, n int, ap []f64, mut x []f64, incx int) {
-	C.cblas_dtpsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &ap[0] },
-		unsafe { &x[0] }, incx)
+pub fn dtpsv(uplo Uplo, trans Transpose, diag Diagonal, n int, ap []f64, mut x []f64, incx int) {
+	C.cblas_dtpsv(.row_major, uplo, trans, diag, n, unsafe { &ap[0] }, unsafe { &x[0] },
+		incx)
 }
 
 @[inline]
-pub fn ctpsv(uplo bool, trans bool, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) {
-	C.cblas_ctpsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, ap, x, incx)
+pub fn ctpsv(uplo Uplo, trans Transpose, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) {
+	C.cblas_ctpsv(.row_major, uplo, trans, diag, n, ap, x, incx)
 }
 
 @[inline]
-pub fn ztpsv(uplo bool, trans bool, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) {
-	C.cblas_ztpsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, ap, x, incx)
+pub fn ztpsv(uplo Uplo, trans Transpose, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) {
+	C.cblas_ztpsv(.row_major, uplo, trans, diag, n, ap, x, incx)
 }
 
 @[inline]
-pub fn ssymv(uplo bool, n int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) {
-	C.cblas_ssymv(.row_major, c_uplo(uplo), n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
+pub fn ssymv(uplo Uplo, n int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) {
+	C.cblas_ssymv(.row_major, uplo, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
 		incx, beta, unsafe { &y[0] }, incy)
 }
 
 @[inline]
-pub fn dsymv(uplo bool, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
-	C.cblas_dsymv(.row_major, c_uplo(uplo), n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
+pub fn dsymv(uplo Uplo, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
+	C.cblas_dsymv(.row_major, uplo, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] },
 		incx, beta, unsafe { &y[0] }, incy)
 }
 
 @[inline]
-pub fn chemv(uplo bool, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
-	C.cblas_chemv(.row_major, c_uplo(uplo), n, alpha, a, lda, x, incx, beta, y, incy)
+pub fn chemv(uplo Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_chemv(.row_major, uplo, n, alpha, a, lda, x, incx, beta, y, incy)
 }
 
 @[inline]
-pub fn zhemv(uplo bool, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
-	C.cblas_zhemv(.row_major, c_uplo(uplo), n, alpha, a, lda, x, incx, beta, y, incy)
+pub fn zhemv(uplo Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_zhemv(.row_major, uplo, n, alpha, a, lda, x, incx, beta, y, incy)
 }
 
 @[inline]
-pub fn sspmv(uplo bool, n int, alpha f32, ap []f32, x []f32, incx int, beta f32, mut y []f32, incy int) {
-	C.cblas_sspmv(.row_major, c_uplo(uplo), n, alpha, unsafe { &ap[0] }, unsafe { &x[0] },
-		incx, beta, unsafe { &y[0] }, incy)
+pub fn sspmv(uplo Uplo, n int, alpha f32, ap []f32, x []f32, incx int, beta f32, mut y []f32, incy int) {
+	C.cblas_sspmv(.row_major, uplo, n, alpha, unsafe { &ap[0] }, unsafe { &x[0] }, incx,
+		beta, unsafe { &y[0] }, incy)
 }
 
 @[inline]
-pub fn dspmv(uplo bool, n int, alpha f64, ap []f64, x []f64, incx int, beta f64, mut y []f64, incy int) {
-	C.cblas_dspmv(.row_major, c_uplo(uplo), n, alpha, unsafe { &ap[0] }, unsafe { &x[0] },
-		incx, beta, unsafe { &y[0] }, incy)
+pub fn dspmv(uplo Uplo, n int, alpha f64, ap []f64, x []f64, incx int, beta f64, mut y []f64, incy int) {
+	C.cblas_dspmv(.row_major, uplo, n, alpha, unsafe { &ap[0] }, unsafe { &x[0] }, incx,
+		beta, unsafe { &y[0] }, incy)
 }
 
 @[inline]
-pub fn sspr(uplo bool, n int, alpha f32, x []f32, incx int, mut ap []f32) {
-	C.cblas_sspr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &ap[0] })
+pub fn sspr(uplo Uplo, n int, alpha f32, x []f32, incx int, mut ap []f32) {
+	C.cblas_sspr(.row_major, uplo, n, alpha, unsafe { &x[0] }, incx, unsafe { &ap[0] })
 }
 
 @[inline]
-pub fn dspr(uplo bool, n int, alpha f64, x []f64, incx int, mut ap []f64) {
-	C.cblas_dspr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &ap[0] })
+pub fn dspr(uplo Uplo, n int, alpha f64, x []f64, incx int, mut ap []f64) {
+	C.cblas_dspr(.row_major, uplo, n, alpha, unsafe { &x[0] }, incx, unsafe { &ap[0] })
 }
 
 @[inline]
-pub fn chpr(uplo bool, n int, alpha f32, x voidptr, incx int, mut a voidptr) {
-	C.cblas_chpr(.row_major, c_uplo(uplo), n, alpha, x, incx, a)
+pub fn chpr(uplo Uplo, n int, alpha f32, x voidptr, incx int, mut a voidptr) {
+	C.cblas_chpr(.row_major, uplo, n, alpha, x, incx, a)
 }
 
 @[inline]
-pub fn zhpr(uplo bool, n int, alpha f64, x voidptr, incx int, mut a voidptr) {
-	C.cblas_zhpr(.row_major, c_uplo(uplo), n, alpha, x, incx, a)
+pub fn zhpr(uplo Uplo, n int, alpha f64, x voidptr, incx int, mut a voidptr) {
+	C.cblas_zhpr(.row_major, uplo, n, alpha, x, incx, a)
 }
 
 @[inline]
-pub fn sspr2(uplo bool, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32) {
-	C.cblas_sspr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] },
+pub fn sspr2(uplo Uplo, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32) {
+	C.cblas_sspr2(.row_major, uplo, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] },
 		incy, unsafe { &a[0] })
 }
 
 @[inline]
-pub fn dspr2(uplo bool, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64) {
-	C.cblas_dspr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] },
+pub fn dspr2(uplo Uplo, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64) {
+	C.cblas_dspr2(.row_major, uplo, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] },
 		incy, unsafe { &a[0] })
 }
 
 @[inline]
-pub fn chpr2(uplo bool, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut ap voidptr) {
-	C.cblas_chpr2(.row_major, c_uplo(uplo), n, alpha, x, incx, y, incy, ap)
+pub fn chpr2(uplo Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut ap voidptr) {
+	C.cblas_chpr2(.row_major, uplo, n, alpha, x, incx, y, incy, ap)
 }
 
 @[inline]
-pub fn zhpr2(uplo bool, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut ap voidptr) {
-	C.cblas_zhpr2(.row_major, c_uplo(uplo), n, alpha, x, incx, y, incy, ap)
+pub fn zhpr2(uplo Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut ap voidptr) {
+	C.cblas_zhpr2(.row_major, uplo, n, alpha, x, incx, y, incy, ap)
 }
 
 @[inline]
-pub fn chbmv(uplo bool, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
-	C.cblas_chbmv(.row_major, c_uplo(uplo), n, k, alpha, a, lda, x, incx, beta, y, incy)
+pub fn chbmv(uplo Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_chbmv(.row_major, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy)
 }
 
 @[inline]
-pub fn zhbmv(uplo bool, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
-	C.cblas_zhbmv(.row_major, c_uplo(uplo), n, k, alpha, a, lda, x, incx, beta, y, incy)
+pub fn zhbmv(uplo Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_zhbmv(.row_major, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy)
 }
 
 @[inline]
-pub fn chpmv(uplo bool, n int, alpha voidptr, ap voidptr, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
-	C.cblas_chpmv(.row_major, c_uplo(uplo), n, alpha, ap, x, incx, beta, y, incy)
+pub fn chpmv(uplo Uplo, n int, alpha voidptr, ap voidptr, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_chpmv(.row_major, uplo, n, alpha, ap, x, incx, beta, y, incy)
 }
 
 @[inline]
-pub fn zhpmv(uplo bool, n int, alpha voidptr, ap voidptr, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
-	C.cblas_zhpmv(.row_major, c_uplo(uplo), n, alpha, ap, x, incx, beta, y, incy)
+pub fn zhpmv(uplo Uplo, n int, alpha voidptr, ap voidptr, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) {
+	C.cblas_zhpmv(.row_major, uplo, n, alpha, ap, x, incx, beta, y, incy)
 }
 
 @[inline]
-pub fn ssyrk(uplo bool, trans bool, n int, k int, alpha f32, a []f32, lda int, beta f32, mut c []f32, ldc int) {
-	C.cblas_ssyrk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, unsafe { &a[0] },
-		lda, beta, unsafe { &c[0] }, ldc)
+pub fn ssyrk(uplo Uplo, trans Transpose, n int, k int, alpha f32, a []f32, lda int, beta f32, mut c []f32, ldc int) {
+	C.cblas_ssyrk(.row_major, uplo, trans, n, k, alpha, unsafe { &a[0] }, lda, beta, unsafe { &c[0] },
+		ldc)
 }
 
 @[inline]
-pub fn dsyrk(uplo bool, trans bool, n int, k int, alpha f64, a []f64, lda int, beta f64, mut c []f64, ldc int) {
-	C.cblas_dsyrk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, unsafe { &a[0] },
-		lda, beta, unsafe { &c[0] }, ldc)
+pub fn dsyrk(uplo Uplo, trans Transpose, n int, k int, alpha f64, a []f64, lda int, beta f64, mut c []f64, ldc int) {
+	C.cblas_dsyrk(.row_major, uplo, trans, n, k, alpha, unsafe { &a[0] }, lda, beta, unsafe { &c[0] },
+		ldc)
 }
 
 @[inline]
-pub fn csyrk(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, mut c voidptr, ldc int) {
-	C.cblas_csyrk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, beta,
-		c, ldc)
+pub fn csyrk(uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, mut c voidptr, ldc int) {
+	C.cblas_csyrk(.row_major, uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
 }
 
 @[inline]
-pub fn zsyrk(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, mut c voidptr, ldc int) {
-	C.cblas_zsyrk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, beta,
-		c, ldc)
+pub fn zsyrk(uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, mut c voidptr, ldc int) {
+	C.cblas_zsyrk(.row_major, uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
 }
 
 @[inline]
-pub fn ssyr2k(uplo bool, trans bool, n int, k int, alpha f32, a []f32, lda int, b []f32, ldb int, beta f32, mut c []f32, ldc int) {
-	C.cblas_ssyr2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, unsafe { &a[0] },
-		lda, unsafe { &b[0] }, ldb, beta, unsafe { &c[0] }, ldc)
+pub fn ssyr2k(uplo Uplo, trans Transpose, n int, k int, alpha f32, a []f32, lda int, b []f32, ldb int, beta f32, mut c []f32, ldc int) {
+	C.cblas_ssyr2k(.row_major, uplo, trans, n, k, alpha, unsafe { &a[0] }, lda, unsafe { &b[0] },
+		ldb, beta, unsafe { &c[0] }, ldc)
 }
 
 @[inline]
-pub fn dsyr2k(uplo bool, trans bool, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut c []f64, ldc int) {
-	C.cblas_dsyr2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, unsafe { &a[0] },
-		lda, unsafe { &b[0] }, ldb, beta, unsafe { &c[0] }, ldc)
+pub fn dsyr2k(uplo Uplo, trans Transpose, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut c []f64, ldc int) {
+	C.cblas_dsyr2k(.row_major, uplo, trans, n, k, alpha, unsafe { &a[0] }, lda, unsafe { &b[0] },
+		ldb, beta, unsafe { &c[0] }, ldc)
 }
 
 @[inline]
-pub fn csyr2k(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) {
-	C.cblas_csyr2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, b, ldb,
-		beta, c, ldc)
+pub fn csyr2k(uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) {
+	C.cblas_csyr2k(.row_major, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
 }
 
 @[inline]
-pub fn zsyr2k(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) {
-	C.cblas_zsyr2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, b, ldb,
-		beta, c, ldc)
+pub fn zsyr2k(uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) {
+	C.cblas_zsyr2k(.row_major, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
 }
 
 @[inline]
-pub fn strmm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha f32, a []f32, lda int, mut b []f32, ldb int) {
-	C.cblas_strmm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, unsafe { &a[0] },
+pub fn strmm(side Side, uplo Uplo, trans Transpose, diag Diagonal, m int, n int, alpha f32, a []f32, lda int, mut b []f32, ldb int) {
+	C.cblas_strmm(.row_major, side, uplo, trans, diag, m, n, alpha, unsafe { &a[0] },
 		lda, unsafe { &b[0] }, ldb)
 }
 
 @[inline]
-pub fn dtrmm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha f64, a []f64, lda int, mut b []f64, ldb int) {
-	C.cblas_dtrmm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, unsafe { &a[0] },
+pub fn dtrmm(side Side, uplo Uplo, trans Transpose, diag Diagonal, m int, n int, alpha f64, a []f64, lda int, mut b []f64, ldb int) {
+	C.cblas_dtrmm(.row_major, side, uplo, trans, diag, m, n, alpha, unsafe { &a[0] },
 		lda, unsafe { &b[0] }, ldb)
 }
 
 @[inline]
-pub fn ctrmm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) {
-	C.cblas_ctrmm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, a,
-		lda, b, ldb)
+pub fn ctrmm(side Side, uplo Uplo, trans Transpose, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) {
+	C.cblas_ctrmm(.row_major, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
 }
 
 @[inline]
-pub fn ztrmm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) {
-	C.cblas_ztrmm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, a,
-		lda, b, ldb)
+pub fn ztrmm(side Side, uplo Uplo, trans Transpose, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) {
+	C.cblas_ztrmm(.row_major, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
 }
 
 @[inline]
-pub fn strsm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha f32, a []f32, lda int, mut b []f32, ldb int) {
-	C.cblas_strsm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, unsafe { &a[0] },
+pub fn strsm(side Side, uplo Uplo, trans Transpose, diag Diagonal, m int, n int, alpha f32, a []f32, lda int, mut b []f32, ldb int) {
+	C.cblas_strsm(.row_major, side, uplo, trans, diag, m, n, alpha, unsafe { &a[0] },
 		lda, unsafe { &b[0] }, ldb)
 }
 
 @[inline]
-pub fn dtrsm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha f64, a []f64, lda int, mut b []f64, ldb int) {
-	C.cblas_dtrsm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, unsafe { &a[0] },
+pub fn dtrsm(side Side, uplo Uplo, trans Transpose, diag Diagonal, m int, n int, alpha f64, a []f64, lda int, mut b []f64, ldb int) {
+	C.cblas_dtrsm(.row_major, side, uplo, trans, diag, m, n, alpha, unsafe { &a[0] },
 		lda, unsafe { &b[0] }, ldb)
 }
 
 @[inline]
-pub fn ctrsm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) {
-	C.cblas_ctrsm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, a,
-		lda, b, ldb)
+pub fn ctrsm(side Side, uplo Uplo, trans Transpose, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) {
+	C.cblas_ctrsm(.row_major, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
 }
 
 @[inline]
-pub fn ztrsm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) {
-	C.cblas_ztrsm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, a,
-		lda, b, ldb)
+pub fn ztrsm(side Side, uplo Uplo, trans Transpose, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) {
+	C.cblas_ztrsm(.row_major, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
 }
 
 @[inline]
-pub fn chemm(side Side, uplo bool, m int, n int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) {
-	C.cblas_chemm(.row_major, side, c_uplo(uplo), m, n, alpha, a, lda, b, ldb, beta, c,
-		ldc)
+pub fn chemm(side Side, uplo Uplo, m int, n int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) {
+	C.cblas_chemm(.row_major, side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc)
 }
 
 @[inline]
-pub fn zhemm(side Side, uplo bool, m int, n int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) {
-	C.cblas_zhemm(.row_major, side, c_uplo(uplo), m, n, alpha, a, lda, b, ldb, beta, c,
-		ldc)
+pub fn zhemm(side Side, uplo Uplo, m int, n int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) {
+	C.cblas_zhemm(.row_major, side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc)
 }
 
 @[inline]
-pub fn cherk(uplo bool, trans bool, n int, k int, alpha f32, a voidptr, lda int, beta f32, mut c voidptr, ldc int) {
-	C.cblas_cherk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, beta,
-		c, ldc)
+pub fn cherk(uplo Uplo, trans Transpose, n int, k int, alpha f32, a voidptr, lda int, beta f32, mut c voidptr, ldc int) {
+	C.cblas_cherk(.row_major, uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
 }
 
 @[inline]
-pub fn zherk(uplo bool, trans bool, n int, k int, alpha f64, a voidptr, lda int, beta f64, mut c voidptr, ldc int) {
-	C.cblas_zherk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, beta,
-		c, ldc)
+pub fn zherk(uplo Uplo, trans Transpose, n int, k int, alpha f64, a voidptr, lda int, beta f64, mut c voidptr, ldc int) {
+	C.cblas_zherk(.row_major, uplo, trans, n, k, alpha, a, lda, beta, c, ldc)
 }
 
 @[inline]
-pub fn cher2k(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta f32, mut c voidptr, ldc int) {
-	C.cblas_cher2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, b, ldb,
-		beta, c, ldc)
+pub fn cher2k(uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta f32, mut c voidptr, ldc int) {
+	C.cblas_cher2k(.row_major, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
 }
 
 @[inline]
-pub fn zher2k(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta f64, mut c voidptr, ldc int) {
-	C.cblas_zher2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, b, ldb,
-		beta, c, ldc)
+pub fn zher2k(uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta f64, mut c voidptr, ldc int) {
+	C.cblas_zher2k(.row_major, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
 }
 
 @[inline]
@@ -986,47 +970,45 @@ pub fn zaxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, mut y voi
 }
 
 @[inline]
-pub fn somatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha f32, a []f32, lda int, mut b []f32, ldb int) {
-	C.cblas_somatcopy(order, c_trans(trans), rows, cols, alpha, unsafe { &a[0] }, lda,
-		unsafe { &b[0] }, ldb)
+pub fn somatcopy(order MemoryLayout, trans Transpose, rows int, cols int, alpha f32, a []f32, lda int, mut b []f32, ldb int) {
+	C.cblas_somatcopy(order, trans, rows, cols, alpha, unsafe { &a[0] }, lda, unsafe { &b[0] },
+		ldb)
 }
 
 @[inline]
-pub fn domatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha f64, a []f64, lda int, mut b []f64, ldb int) {
-	C.cblas_domatcopy(order, c_trans(trans), rows, cols, alpha, unsafe { &a[0] }, lda,
-		unsafe { &b[0] }, ldb)
+pub fn domatcopy(order MemoryLayout, trans Transpose, rows int, cols int, alpha f64, a []f64, lda int, mut b []f64, ldb int) {
+	C.cblas_domatcopy(order, trans, rows, cols, alpha, unsafe { &a[0] }, lda, unsafe { &b[0] },
+		ldb)
 }
 
 @[inline]
-pub fn comatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha &f32, a &f32, lda int, mut b &f32, ldb int) {
-	C.cblas_comatcopy(order, c_trans(trans), rows, cols, alpha, a, lda, b, ldb)
+pub fn comatcopy(order MemoryLayout, trans Transpose, rows int, cols int, alpha &f32, a &f32, lda int, mut b &f32, ldb int) {
+	C.cblas_comatcopy(order, trans, rows, cols, alpha, a, lda, b, ldb)
 }
 
 @[inline]
-pub fn zomatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha &f64, a &f64, lda int, mut b &f64, ldb int) {
-	C.cblas_zomatcopy(order, c_trans(trans), rows, cols, alpha, a, lda, b, ldb)
+pub fn zomatcopy(order MemoryLayout, trans Transpose, rows int, cols int, alpha &f64, a &f64, lda int, mut b &f64, ldb int) {
+	C.cblas_zomatcopy(order, trans, rows, cols, alpha, a, lda, b, ldb)
 }
 
 @[inline]
-pub fn simatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha f32, mut a []f32, lda int, ldb int) {
-	C.cblas_simatcopy(order, c_trans(trans), rows, cols, alpha, unsafe { &a[0] }, lda,
-		ldb)
+pub fn simatcopy(order MemoryLayout, trans Transpose, rows int, cols int, alpha f32, mut a []f32, lda int, ldb int) {
+	C.cblas_simatcopy(order, trans, rows, cols, alpha, unsafe { &a[0] }, lda, ldb)
 }
 
 @[inline]
-pub fn dimatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha f64, mut a []f64, lda int, ldb int) {
-	C.cblas_dimatcopy(order, c_trans(trans), rows, cols, alpha, unsafe { &a[0] }, lda,
-		ldb)
+pub fn dimatcopy(order MemoryLayout, trans Transpose, rows int, cols int, alpha f64, mut a []f64, lda int, ldb int) {
+	C.cblas_dimatcopy(order, trans, rows, cols, alpha, unsafe { &a[0] }, lda, ldb)
 }
 
 @[inline]
-pub fn cimatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha &f32, mut a &f32, lda int, ldb int) {
-	C.cblas_cimatcopy(order, c_trans(trans), rows, cols, alpha, a, lda, ldb)
+pub fn cimatcopy(order MemoryLayout, trans Transpose, rows int, cols int, alpha &f32, mut a &f32, lda int, ldb int) {
+	C.cblas_cimatcopy(order, trans, rows, cols, alpha, a, lda, ldb)
 }
 
 @[inline]
-pub fn zimatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha &f64, mut a &f64, lda int, ldb int) {
-	C.cblas_zimatcopy(order, c_trans(trans), rows, cols, alpha, a, lda, ldb)
+pub fn zimatcopy(order MemoryLayout, trans Transpose, rows int, cols int, alpha &f64, mut a &f64, lda int, ldb int) {
+	C.cblas_zimatcopy(order, trans, rows, cols, alpha, a, lda, ldb)
 }
 
 @[inline]
@@ -1052,7 +1034,7 @@ pub fn zgeadd(order MemoryLayout, rows int, cols int, alpha &f64, a &f64, lda in
 }
 
 @[inline]
-pub fn dgemm(trans_a bool, trans_b bool, m int, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut cc []f64, ldc int) {
-	C.cblas_dgemm(.row_major, c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, unsafe { &a[0] },
-		lda, unsafe { &b[0] }, ldb, beta, unsafe { &cc[0] }, ldc)
+pub fn dgemm(trans_a Transpose, trans_b Transpose, m int, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut cc []f64, ldc int) {
+	C.cblas_dgemm(.row_major, trans_a, trans_b, m, n, k, alpha, unsafe { &a[0] }, lda,
+		unsafe { &b[0] }, ldb, beta, unsafe { &cc[0] }, ldc)
 }
diff --git a/blas/oblas_notd_vsl_blas_cblas.v b/blas/oblas_notd_vsl_blas_cblas.v
index 8552dde50..f05117128 100644
--- a/blas/oblas_notd_vsl_blas_cblas.v
+++ b/blas/oblas_notd_vsl_blas_cblas.v
@@ -47,8 +47,8 @@ pub fn dscal(n int, alpha f64, mut x []f64, incx int) {
 }
 
 @[inline]
-pub fn dgemv(trans bool, m int, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
-	blas64.dgemv(c_trans(trans), m, n, alpha, a, lda, x, incx, beta, mut y, incy)
+pub fn dgemv(trans Transpose, m int, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
+	blas64.dgemv(trans, m, n, alpha, a, lda, x, incx, beta, mut y, incy)
 }
 
 @[inline]
@@ -57,27 +57,26 @@ pub fn dger(m int, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a
 }
 
 @[inline]
-pub fn dtrsv(uplo bool, trans_a bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
-	blas64.dtrsv(c_uplo(uplo), c_trans(trans_a), diag, n, a, lda, mut x, incx)
+pub fn dtrsv(uplo Uplo, trans_a Transpose, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
+	blas64.dtrsv(uplo, trans_a, diag, n, a, lda, mut x, incx)
 }
 
 @[inline]
-pub fn dtrmv(uplo bool, trans_a bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
-	blas64.dtrmv(c_uplo(uplo), c_trans(trans_a), diag, n, a, lda, mut x, incx)
+pub fn dtrmv(uplo Uplo, trans_a Transpose, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
+	blas64.dtrmv(uplo, trans_a, diag, n, a, lda, mut x, incx)
 }
 
 @[inline]
-pub fn dsyr(uplo bool, n int, alpha f64, x []f64, incx int, mut a []f64, lda int) {
-	blas64.dsyr(c_uplo(uplo), n, alpha, x, incx, mut a, lda)
+pub fn dsyr(uplo Uplo, n int, alpha f64, x []f64, incx int, mut a []f64, lda int) {
+	blas64.dsyr(uplo, n, alpha, x, incx, mut a, lda)
 }
 
 @[inline]
-pub fn dsyr2(uplo bool, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) {
-	blas64.dsyr2(c_uplo(uplo), n, alpha, x, incx, y, incy, mut a, lda)
+pub fn dsyr2(uplo Uplo, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) {
+	blas64.dsyr2(uplo, n, alpha, x, incx, y, incy, mut a, lda)
 }
 
 @[inline]
-pub fn dgemm(trans_a bool, trans_b bool, m int, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut cc []f64, ldc int) {
-	blas64.dgemm(c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, a, lda, b, ldb, beta, mut
-		cc, ldc)
+pub fn dgemm(trans_a Transpose, trans_b Transpose, m int, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut cc []f64, ldc int) {
+	blas64.dgemm(trans_a, trans_b, m, n, k, alpha, a, lda, b, ldb, beta, mut cc, ldc)
 }
diff --git a/float/float64/gemv_test.v b/float/float64/gemv_test.v
index 34a383ee7..c84060ffd 100644
--- a/float/float64/gemv_test.v
+++ b/float/float64/gemv_test.v
@@ -527,13 +527,13 @@ fn test_gemv() {
 	}
 }
 
-fn dgemvcomp(mut test DgemvCase, trans bool, case DgemvSubcase) {
+fn dgemvcomp(mut test DgemvCase, trans Transpose, case DgemvSubcase) {
 	tol := 1e-15
 	x_gd_val, y_gd_val, a_gd_val := 0.5, 1.5, 10
 	gd_ln := 4
 
-	test_x := if trans { test.y } else { test.x }
-	test_y := if trans { test.x } else { test.y }
+	test_x := if trans == .trans { test.y } else { test.x }
+	test_y := if trans == .trans { test.x } else { test.y }
 
 	mut xg, mut yg := guard_vector(test_x, x_gd_val, gd_ln), guard_vector(test_y, y_gd_val,
 		gd_ln)
@@ -543,7 +543,7 @@ fn dgemvcomp(mut test DgemvCase, trans bool, case DgemvSubcase) {
 
 	lda := u32(test.n)
 
-	if trans {
+	if trans == .trans {
 		gemv_t(u32(test.m), u32(test.n), case.alpha, a, lda, x, 1, case.beta, mut y, 1)
 	} else {
 		gemv_n(u32(test.m), u32(test.n), case.alpha, a, lda, x, 1, case.beta, mut y, 1)
@@ -577,7 +577,7 @@ fn dgemvcomp(mut test DgemvCase, trans bool, case DgemvSubcase) {
 		ag = guard_vector(test.a, a_gd_val, gd_ln)
 		a = ag[gd_ln..ag.len - gd_ln]
 
-		if trans {
+		if trans == .trans {
 			gemv_t(u32(test.m), u32(test.n), case.alpha, a, lda, x, u32(inc.x), case.beta, mut
 				y, u32(inc.y))
 		} else {
diff --git a/la/blas.v b/la/blas.v
index 03b88060a..d472ad0af 100644
--- a/la/blas.v
+++ b/la/blas.v
@@ -127,7 +127,7 @@ pub fn matrix_vector_mul[T](alpha T, a &Matrix[T], u []T) []T {
 			}
 			return v
 		}
-		blas.dgemv(false, a.m, a.n, alpha, a.data, a.n, u, 1, 0.0, mut v, 1)
+		blas.dgemv(.no_trans, a.m, a.n, alpha, a.data, a.n, u, 1, 0.0, mut v, 1)
 		return v
 	} $else {
 		mut v := []T{len: a.m}
@@ -157,7 +157,7 @@ pub fn matrix_tr_vector_mul[T](alpha T, a &Matrix[T], u []T) []T {
 			}
 			return v
 		}
-		blas.dgemv(true, a.m, a.n, alpha, a.data, a.n, u, 1, 0.0, mut v, 1)
+		blas.dgemv(.trans, a.m, a.n, alpha, a.data, a.n, u, 1, 0.0, mut v, 1)
 		return v
 	} $else {
 		mut v := []T{len: a.n}
@@ -208,7 +208,7 @@ pub fn vector_vector_tr_mul[T](alpha T, u []T, v []T) &Matrix[T] {
 //
 pub fn matrix_vector_mul_add(alpha f64, a &Matrix[f64], u []f64) []f64 {
 	mut v := []f64{len: a.m}
-	blas.dgemv(false, a.m, a.n, alpha, a.data, a.m, u, 1, 1.0, mut v, 1)
+	blas.dgemv(.no_trans, a.m, a.n, alpha, a.data, a.m, u, 1, 1.0, mut v, 1)
 	return v
 }
 
@@ -228,7 +228,7 @@ pub fn matrix_matrix_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix
 		}
 		return
 	}
-	blas.dgemm(false, false, a.m, b.n, a.n, alpha, a.data, a.m, b.data, b.m, 0.0, mut
+	blas.dgemm(.no_trans, .no_trans, a.m, b.n, a.n, alpha, a.data, a.m, b.data, b.m, 0.0, mut
 		c.data, c.m)
 }
 
@@ -248,8 +248,8 @@ pub fn matrix_tr_matrix_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Mat
 		}
 		return
 	}
-	blas.dgemm(true, false, a.n, b.n, a.m, alpha, a.data, a.m, b.data, b.m, 0.0, mut c.data,
-		c.m)
+	blas.dgemm(.trans, .no_trans, a.n, b.n, a.m, alpha, a.data, a.m, b.data, b.m, 0.0, mut
+		c.data, c.m)
 }
 
 // matrix_matrix_tr_mul returns the matrix multiplication (scaled) with transposed(b)
@@ -257,8 +257,8 @@ pub fn matrix_tr_matrix_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Mat
 //  c := alpha⋅a⋅bᵀ    ⇒    cij := alpha * aik * bjk
 //
 pub fn matrix_matrix_tr_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) {
-	blas.dgemm(false, true, a.m, b.m, a.n, alpha, a.data, a.n, b.data, b.m, 0.0, mut c.data,
-		c.m)
+	blas.dgemm(.no_trans, .trans, a.m, b.m, a.n, alpha, a.data, a.n, b.data, b.m, 0.0, mut
+		c.data, c.m)
 }
 
 // matrix_tr_matrix_tr_mul returns the matrix multiplication (scaled) with transposed(a) and transposed(b)
@@ -266,8 +266,8 @@ pub fn matrix_matrix_tr_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Mat
 //  c := alpha⋅aᵀ⋅bᵀ    ⇒    cij := alpha * aki * bjk
 //
 pub fn matrix_tr_matrix_tr_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) {
-	blas.dgemm(true, true, a.n, b.m, a.m, alpha, a.data, a.n, b.data, b.m, 0.0, mut c.data,
-		c.m)
+	blas.dgemm(.trans, .trans, a.n, b.m, a.m, alpha, a.data, a.n, b.data, b.m, 0.0, mut
+		c.data, c.m)
 }
 
 // matrix_matrix_muladd returns the matrix multiplication (scaled)
@@ -275,7 +275,7 @@ pub fn matrix_tr_matrix_tr_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &
 //  c += alpha⋅a⋅b    ⇒    cij += alpha * aik * bkj
 //
 pub fn matrix_matrix_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) {
-	blas.dgemm(false, false, a.m, b.n, a.n, alpha, a.data, a.n, b.data, b.m, 1.0, mut
+	blas.dgemm(.no_trans, .no_trans, a.m, b.n, a.n, alpha, a.data, a.n, b.data, b.m, 1.0, mut
 		c.data, c.m)
 }
 
@@ -284,8 +284,8 @@ pub fn matrix_matrix_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Mat
 //  c += alpha⋅aᵀ⋅b    ⇒    cij += alpha * aki * bkj
 //
 pub fn matrix_tr_matrix_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) {
-	blas.dgemm(true, false, a.n, b.n, a.m, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data,
-		c.m)
+	blas.dgemm(.trans, .no_trans, a.n, b.n, a.m, alpha, a.data, a.n, b.data, b.m, 1.0, mut
+		c.data, c.m)
 }
 
 // matrix_matrix_tr_muladd returns the matrix multiplication (scaled) with transposed(b)
@@ -293,8 +293,8 @@ pub fn matrix_tr_matrix_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &
 //  c += alpha⋅a⋅bᵀ    ⇒    cij += alpha * aik * bjk
 //
 pub fn matrix_matrix_tr_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) {
-	blas.dgemm(false, true, a.m, b.m, a.n, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data,
-		c.m)
+	blas.dgemm(.no_trans, .trans, a.m, b.m, a.n, alpha, a.data, a.n, b.data, b.m, 1.0, mut
+		c.data, c.m)
 }
 
 // matrix_tr_matrix_tr_mul_add returns the matrix multiplication (scaled) with transposed(a) and transposed(b)
@@ -302,8 +302,8 @@ pub fn matrix_matrix_tr_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &
 //  c += alpha⋅aᵀ⋅bᵀ    ⇒    cij += alpha * aki * bjk
 //
 pub fn matrix_tr_matrix_tr_mul_add(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) {
-	blas.dgemm(true, true, a.n, b.m, a.m, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data,
-		c.m)
+	blas.dgemm(.trans, .trans, a.n, b.m, a.m, alpha, a.data, a.n, b.data, b.m, 1.0, mut
+		c.data, c.m)
 }
 
 // matrix_add adds the scaled components of two matrices
diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v
index 5a1dc8d29..01f7968f8 100644
--- a/lapack/lapack64/dgetrf.v
+++ b/lapack/lapack64/dgetrf.v
@@ -79,13 +79,13 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, mut ipiv []int) {
 			dlaswp(j, mut slice2, lda, j, j + jb, mut slice_ipiv2, 1)
 
 			mut slice3 := unsafe { a[j * lda + j + jb..] }
-			blas.dtrsm(.left, false, false, .unit, jb, n - j - jb, 1, a[j * lda + j..],
+			blas.dtrsm(.left, .lower, .no_trans, .unit, jb, n - j - jb, 1, a[j * lda + j..],
 				lda, mut slice3, lda)
 
 			if j + jb < m {
 				mut slice4 := unsafe { a[(j + jb) * lda + j + jb..] }
-				blas.dgemm(false, false, m - j - jb, n - j - jb, jb, -1, a[(j + jb) * lda + j..],
-					lda, a[j * lda + j + jb..], lda, 1, mut slice4, lda)
+				blas.dgemm(.no_trans, .no_trans, m - j - jb, n - j - jb, jb, -1, a[(j + jb) * lda +
+					j..], lda, a[j * lda + j + jb..], lda, 1, mut slice4, lda)
 			}
 		}
 	}
diff --git a/lapack/lapack64/dgetrs.v b/lapack/lapack64/dgetrs.v
index 9e1424600..fa40169d5 100644
--- a/lapack/lapack64/dgetrs.v
+++ b/lapack/lapack64/dgetrs.v
@@ -52,15 +52,15 @@ pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []f64, lda int, mut i
 		// Solve A * X = B.
 		dlaswp(nrhs, mut b, ldb, 0, n - 1, mut ipiv, 1)
 		// Solve L * X = B, overwriting B with X.
-		blas.dtrsm(.left, false, false, .unit, n, nrhs, 1, a, lda, mut b, ldb)
+		blas.dtrsm(.left, .lower, .no_trans, .unit, n, nrhs, 1, a, lda, mut b, ldb)
 		// Solve U * X = B, overwriting B with X.
-		blas.dtrsm(.left, true, false, .non_unit, n, nrhs, 1, a, lda, mut b, ldb)
+		blas.dtrsm(.left, .upper, .no_trans, .non_unit, n, nrhs, 1, a, lda, mut b, ldb)
 	}
 
 	// Solve Aᵀ * X = B.
 	// Solve Uᵀ * X = B, overwriting B with X.
-	blas.dtrsm(.left, true, true, .non_unit, n, nrhs, 1, a, lda, mut b, ldb)
+	blas.dtrsm(.left, .upper, .trans, .non_unit, n, nrhs, 1, a, lda, mut b, ldb)
 	// Solve Lᵀ * X = B, overwriting B with X.
-	blas.dtrsm(.left, false, true, .unit, n, nrhs, 1, a, lda, mut b, ldb)
+	blas.dtrsm(.left, .lower, .trans, .unit, n, nrhs, 1, a, lda, mut b, ldb)
 	dlaswp(nrhs, mut b, ldb, 0, n - 1, mut ipiv, -1)
 }
diff --git a/lapack/lapack_d_vsl_lapack_lapacke.v b/lapack/lapack_d_vsl_lapack_lapacke.v
index 9bc8314d9..bd96129ae 100644
--- a/lapack/lapack_d_vsl_lapack_lapacke.v
+++ b/lapack/lapack_d_vsl_lapack_lapacke.v
@@ -47,8 +47,8 @@ pub fn dgesv(n int, nrhs int, mut a []f64, lda int, mut ipiv []int, mut b []f64,
 	if ipiv.len != n {
 		errors.vsl_panic('ipiv.len must be equal to n. ${ipiv.len} != ${n}\n', .efailed)
 	}
-	info := C.LAPACKE_dgesv(.row_major, n, nrhs, unsafe { &a[0] }, lda, unsafe { &ipiv[0] }, unsafe { &b[0] },
-		ldb)
+	info := C.LAPACKE_dgesv(.row_major, n, nrhs, unsafe { &a[0] }, lda, unsafe { &ipiv[0] },
+		unsafe { &b[0] }, ldb)
 	if info != 0 {
 		errors.vsl_panic('lapack failed', .efailed)
 	}
@@ -141,9 +141,9 @@ pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) {
 // where U is an upper triangular matrix and L is lower triangular.
 //
 // This is the block version of the algorithm, calling Level 3 BLAS.
-pub fn dpotrf(uplo bool, n int, mut a []f64, lda int) {
+pub fn dpotrf(uplo blas.Uplo, n int, mut a []f64, lda int) {
 	unsafe {
-		info := C.LAPACKE_dpotrf(.row_major, blas.c_uplo(uplo), n, &a[0], lda)
+		info := C.LAPACKE_dpotrf(.row_major, uplo, n, &a[0], lda)
 		if info != 0 {
 			errors.vsl_panic('lapack failed', .efailed)
 		}

From c66401ffaacc74a8c1ab1b281215f88688b3eff6 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Tue, 18 Jun 2024 02:35:14 -0300
Subject: [PATCH 21/33] refactor: Update gemv_test.v to use named arguments in
 dgemvcomp calls

---
 float/float64/gemv_test.v | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/float/float64/gemv_test.v b/float/float64/gemv_test.v
index c84060ffd..bd2749b64 100644
--- a/float/float64/gemv_test.v
+++ b/float/float64/gemv_test.v
@@ -518,11 +518,11 @@ struct DgemvSubcase {
 fn test_gemv() {
 	for mut test in float64.dgemv_tests {
 		for case in test.no_trans {
-			dgemvcomp(mut test, false, case)
+			dgemvcomp(mut test, .no_trans, case)
 		}
 
 		for case in test.trans {
-			dgemvcomp(mut test, true, case)
+			dgemvcomp(mut test, .trans, case)
 		}
 	}
 }

From d399e1a9d44d11acdb07d7652728c42b94911da8 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Tue, 18 Jun 2024 02:37:17 -0300
Subject: [PATCH 22/33] refactor: Update gemv_test.v to use named arguments in
 dgemvcomp calls

---
 float/float64/gemv_test.v | 1 +
 1 file changed, 1 insertion(+)

diff --git a/float/float64/gemv_test.v b/float/float64/gemv_test.v
index bd2749b64..efe56858c 100644
--- a/float/float64/gemv_test.v
+++ b/float/float64/gemv_test.v
@@ -1,6 +1,7 @@
 module float64
 
 import math
+import vsl.blas
 
 const dgemv_tests = [
 	DgemvCase{ // 1x1

From 1d5e441f6a63470226ebf237b602d9a9851f471f Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Tue, 18 Jun 2024 02:38:35 -0300
Subject: [PATCH 23/33] refactor: Update gemv_test.v to use named arguments in
 dgemvcomp calls

---
 float/float64/gemv_test.v | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/float/float64/gemv_test.v b/float/float64/gemv_test.v
index efe56858c..34a383ee7 100644
--- a/float/float64/gemv_test.v
+++ b/float/float64/gemv_test.v
@@ -1,7 +1,6 @@
 module float64
 
 import math
-import vsl.blas
 
 const dgemv_tests = [
 	DgemvCase{ // 1x1
@@ -519,22 +518,22 @@ struct DgemvSubcase {
 fn test_gemv() {
 	for mut test in float64.dgemv_tests {
 		for case in test.no_trans {
-			dgemvcomp(mut test, .no_trans, case)
+			dgemvcomp(mut test, false, case)
 		}
 
 		for case in test.trans {
-			dgemvcomp(mut test, .trans, case)
+			dgemvcomp(mut test, true, case)
 		}
 	}
 }
 
-fn dgemvcomp(mut test DgemvCase, trans Transpose, case DgemvSubcase) {
+fn dgemvcomp(mut test DgemvCase, trans bool, case DgemvSubcase) {
 	tol := 1e-15
 	x_gd_val, y_gd_val, a_gd_val := 0.5, 1.5, 10
 	gd_ln := 4
 
-	test_x := if trans == .trans { test.y } else { test.x }
-	test_y := if trans == .trans { test.x } else { test.y }
+	test_x := if trans { test.y } else { test.x }
+	test_y := if trans { test.x } else { test.y }
 
 	mut xg, mut yg := guard_vector(test_x, x_gd_val, gd_ln), guard_vector(test_y, y_gd_val,
 		gd_ln)
@@ -544,7 +543,7 @@ fn dgemvcomp(mut test DgemvCase, trans Transpose, case DgemvSubcase) {
 
 	lda := u32(test.n)
 
-	if trans == .trans {
+	if trans {
 		gemv_t(u32(test.m), u32(test.n), case.alpha, a, lda, x, 1, case.beta, mut y, 1)
 	} else {
 		gemv_n(u32(test.m), u32(test.n), case.alpha, a, lda, x, 1, case.beta, mut y, 1)
@@ -578,7 +577,7 @@ fn dgemvcomp(mut test DgemvCase, trans Transpose, case DgemvSubcase) {
 		ag = guard_vector(test.a, a_gd_val, gd_ln)
 		a = ag[gd_ln..ag.len - gd_ln]
 
-		if trans == .trans {
+		if trans {
 			gemv_t(u32(test.m), u32(test.n), case.alpha, a, lda, x, u32(inc.x), case.beta, mut
 				y, u32(inc.y))
 		} else {

From 9f46519d6e0031e248977d5bd3bae10ad8b62285 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sat, 22 Jun 2024 02:50:40 -0300
Subject: [PATCH 24/33] refactor: Update conversions.v, dgetf2.v, dsyev.v, and
 lapack_notd_vsl_lapack_lapacke.v

- Add functions uplo_from_bool and uplo_to_bool to conversions.v
- Remove unused dlamch_s function from dgetf2.v
- Update dsyev.v to include additional error handling and scaling of matrix
- Update dpotrf function in lapack_notd_vsl_lapack_lapacke.v to use uplo_from_bool for uplo parameter
---
 blas/conversions.v                      |  10 ++
 lapack/lapack64/dgetf2.v                |   8 +-
 lapack/lapack64/dlansy.v                | 122 ++++++++++++++++++++++++
 lapack/lapack64/dlassq.v                | 120 +++++++++++++++++++++++
 lapack/lapack64/dsyev.v                 | 108 ++++++++++++++++-----
 lapack/lapack64/lapack64.v              |  50 ++++++++++
 lapack/lapack_notd_vsl_lapack_lapacke.v |   4 +-
 7 files changed, 391 insertions(+), 31 deletions(-)
 create mode 100644 lapack/lapack64/dlansy.v
 create mode 100644 lapack/lapack64/dlassq.v
 create mode 100644 lapack/lapack64/lapack64.v

diff --git a/blas/conversions.v b/blas/conversions.v
index b421fa60c..18ace678c 100644
--- a/blas/conversions.v
+++ b/blas/conversions.v
@@ -21,6 +21,16 @@ pub type Diagonal = blas64.Diagonal
 // Side is used to specify whether a matrix is on the left or right side in a matrix-matrix multiplication.
 pub type Side = blas64.Side
 
+// uplo_from_bool converts a boolean to Uplo.
+pub fn uplo_from_bool(uplo bool) Uplo {
+	return if uplo { .upper } else { .lower }
+}
+
+// uplo_to_bool converts Uplo to a boolean.
+pub fn uplo_to_bool(uplo Uplo) bool {
+	return uplo == .upper
+}
+
 // slice_to_col_major converts nested slice into an array representing a col-major matrix
 //
 // _**NOTE**: make sure to have at least 1x1 item_
diff --git a/lapack/lapack64/dgetf2.v b/lapack/lapack64/dgetf2.v
index 2748c78d7..9aa920500 100644
--- a/lapack/lapack64/dgetf2.v
+++ b/lapack/lapack64/dgetf2.v
@@ -24,7 +24,7 @@ pub fn dgetf2(m int, n int, mut a []f64, lda int, mut ipiv []int) {
 		panic(bad_len_ipiv)
 	}
 
-	sfmin := dlamch_s()
+	sfmin := dlamch_s
 
 	for j := 0; j < mn; j++ {
 		// Find a pivot and test for singularity.
@@ -58,9 +58,3 @@ pub fn dgetf2(m int, n int, mut a []f64, lda int, mut ipiv []int) {
 		}
 	}
 }
-
-fn dlamch_s() f64 {
-	// Returns the safe minimum value (sfmin).
-	// This value is used as a threshold for detecting small values in the matrix.
-	return math.ldexp(1.0, -1022) // Smallest positive normal number.
-}
diff --git a/lapack/lapack64/dlansy.v b/lapack/lapack64/dlansy.v
new file mode 100644
index 000000000..04dd3284f
--- /dev/null
+++ b/lapack/lapack64/dlansy.v
@@ -0,0 +1,122 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// dlansy returns the value of the specified norm of an n×n symmetric matrix.
+// If norm == MatrixNorm.max_column_sum or norm == MatrixNorm.max_row_sum, work must have length
+// at least n, otherwise work is unused.
+pub fn dlansy(norm MatrixNorm, uplo blas.Uplo, n int, a []f64, lda int, mut work []f64) f64 {
+	if norm != .max_row_sum && norm != .max_column_sum && norm != .frobenius && norm != .max_abs {
+		panic(lapack64.bad_norm)
+	}
+	if uplo != .upper && uplo != .lower {
+		panic(lapack64.bad_uplo)
+	}
+	if n < 0 {
+		panic('lapack: n < 0')
+	}
+	if lda < math.max(1, n) {
+		panic(lapack64.bad_ld_a)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return 0.0
+	}
+
+	if a.len < (n-1) * lda + n {
+		panic(lapack64.short_a)
+	}
+	if (norm == .max_column_sum || norm == .max_row_sum) && work.len < n {
+		panic(lapack64.short_work)
+	}
+
+	match norm {
+		.max_abs {
+			if uplo == .upper {
+				mut max := 0.0
+				for i in 0 .. n {
+					for j in i .. n {
+						v := math.abs(a[i * lda + j])
+						if math.is_nan(v) {
+							return math.nan()
+						}
+						if v > max {
+							max = v
+						}
+					}
+				}
+				return max
+			}
+			mut max := 0.0
+			for i in 0 .. n {
+				for j in 0 .. i + 1 {
+					v := math.abs(a[i * lda + j])
+					if math.is_nan(v) {
+						return math.nan()
+					}
+					if v > max {
+						max = v
+					}
+				}
+			}
+			return max
+		}
+		.max_row_sum, .max_column_sum {
+			// A symmetric matrix has the same 1-norm and ∞-norm.
+			for i in 0 .. n {
+				work[i] = 0.0
+			}
+			if uplo == .upper {
+				for i in 0 .. n {
+					work[i] += math.abs(a[i * lda + i])
+					for j in i + 1 .. n {
+						v := math.abs(a[i * lda + j])
+						work[i] += v
+						work[j] += v
+					}
+				}
+			} else {
+				for i in 0 .. n {
+					for j in 0 .. i {
+						v := math.abs(a[i * lda + j])
+						work[i] += v
+						work[j] += v
+					}
+					work[i] += math.abs(a[i * lda + i])
+				}
+			}
+			mut max := 0.0
+			for i in 0 .. n {
+				v := work[i]
+				if math.is_nan(v) {
+					return math.nan()
+				}
+				if v > max {
+					max = v
+				}
+			}
+			return max
+		}
+		else {
+			// blas.frobenius:
+			mut scale := 0.0
+			mut sum := 1.0
+			// Sum off-diagonals.
+			if uplo == .upper {
+				for i in 0 .. n - 1 {
+					scale, sum = dlassq(n - i - 1, a[i * lda + i + 1..], 1, scale, sum)
+				}
+			} else {
+				for i in 1 .. n {
+					scale, sum = dlassq(i, a[i * lda..], 1, scale, sum)
+				}
+			}
+			sum *= 2.0
+			// Sum diagonal.
+			scale, sum = dlassq(n, a, lda + 1, scale, sum)
+			return scale * math.sqrt(sum)
+		}
+	}
+}
diff --git a/lapack/lapack64/dlassq.v b/lapack/lapack64/dlassq.v
new file mode 100644
index 000000000..c29d74dc0
--- /dev/null
+++ b/lapack/lapack64/dlassq.v
@@ -0,0 +1,120 @@
+module lapack64
+
+import math
+
+// dlassq updates a sum of squares represented in scaled form. It returns
+// the values scl and smsq such that
+//
+// 	scl^2*smsq = X[0]^2 + ... + X[n-1]^2 + scale^2*sumsq
+//
+// The value of sumsq is assumed to be non-negative.
+pub fn dlassq(n int, x []f64, incx int, scale f64, sumsq f64) (f64, f64) {
+	if n < 0 {
+		panic('lapack: n < 0')
+	}
+	if incx <= 0 {
+		panic('lapack: increment not one or negative one')
+	}
+	if x.len < 1 + (n - 1) * incx {
+		panic('lapack: insufficient length of x')
+	}
+
+	if math.is_nan(scale) || math.is_nan(sumsq) {
+		return scale, sumsq
+	}
+
+	mut scl := scale
+	mut smsq := sumsq
+
+	if smsq == 0.0 {
+		scl = 1.0
+	}
+	if scl == 0.0 {
+		scl = 1.0
+		smsq = 0.0
+	}
+
+	if n == 0 {
+		return scl, smsq
+	}
+
+	// Compute the sum of squares in 3 accumulators:
+	//  - abig: sum of squares scaled down to avoid overflow
+	//  - asml: sum of squares scaled up to avoid underflow
+	//  - amed: sum of squares that do not require scaling
+	// The thresholds and multipliers are:
+	//  - values bigger than dtbig are scaled down by dsbig
+	//  - values smaller than dtsml are scaled up by dssml
+	mut is_big := false
+	mut asml, mut amed, mut abig := 0.0, 0.0, 0.0
+	mut ix := 0
+	for _ in 0 .. n {
+		mut ax := math.abs(x[ix])
+		if ax > dtbig {
+			ax *= dsbig
+			abig += ax * ax
+			is_big = true
+		} else if ax < dtsml {
+			if !is_big {
+				ax *= dssml
+				asml += ax * ax
+			}
+		} else {
+			amed += ax * ax
+		}
+		ix += incx
+	}
+	// Put the existing sum of squares into one of the accumulators.
+	if smsq > 0.0 {
+		ax := scl * math.sqrt(smsq)
+		if ax > dtbig {
+			if scl > 1.0 {
+				scl *= dsbig
+				abig += scl * scl * smsq
+			} else {
+				// sumsq > dtbig^2 => (dsbig * (dsbig * sumsq)) is representable.
+				abig += scl * scl * dsbig * dsbig * smsq
+			}
+		} else if ax < dtsml {
+			if !is_big {
+				if scl < 1.0 {
+					scl *= dssml
+					asml += scl * scl * smsq
+				} else {
+					// sumsq < dtsml^2 => (dssml * (dssml * sumsq)) is representable.
+					asml += scl * scl * dssml * dssml * smsq
+				}
+			}
+		} else {
+			amed += scl * scl * smsq
+		}
+	}
+	// Combine abig and amed or amed and asml if more than one accumulator was used.
+	if abig > 0.0 {
+		// Combine abig and amed:
+		if amed > 0.0 || math.is_nan(amed) {
+			abig += amed * dsbig * dsbig
+		}
+		scl = 1.0 / dsbig
+		smsq = abig
+	} else if asml > 0.0 {
+		// Combine amed and asml:
+		if amed > 0.0 || math.is_nan(amed) {
+			amed = math.sqrt(amed)
+			asml = math.sqrt(asml) / dssml
+			mut ymin, mut ymax := asml, amed
+			if asml > amed {
+				ymin, ymax = amed, asml
+			}
+			scl = 1.0
+			smsq = ymax * ymax * (1.0 + (ymin / ymax) * (ymin / ymax))
+		} else {
+			scl = 1.0 / dssml
+			smsq = asml
+		}
+	} else {
+		scl = 1.0
+		smsq = amed
+	}
+	return scl, smsq
+}
diff --git a/lapack/lapack64/dsyev.v b/lapack/lapack64/dsyev.v
index a48998d2a..a65f019fa 100644
--- a/lapack/lapack64/dsyev.v
+++ b/lapack/lapack64/dsyev.v
@@ -3,36 +3,100 @@ module lapack64
 import math
 import vsl.blas
 
-// dsyev computes all eigenvalues and, optionally, eigenvectors of a real symmetric matrix A.
-pub fn dsyev(jobz EVJob, uplo blas.Uplo, n int, mut a []f64, lda int, w []f64) int {
+pub fn dsyev(jobz EVJob, uplo blas.Uplo, n int, mut a []f64, lda int, mut w []f64, mut work []f64, lwork int) {
+	if jobz != .ev_none && jobz != .ev_compute {
+		panic(bad_ev_job)
+	}
+	if uplo != .upper && uplo != .lower {
+		panic(bad_uplo)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	}
+	if lwork < math.max(1, 3 * n - 1) && lwork != -1 {
+		panic(bad_l_work)
+	}
+	if work.len < math.max(1, lwork) {
+		panic(short_work)
+	}
+
+	// Quick return if possible.
 	if n == 0 {
-		return 0
+		return
 	}
 
-	mut info := 0
-	if jobz != .ev_none && jobz != .ev_compute {
-		info = -1
-	} else if uplo != .upper && uplo != .lower {
-		info = -2
-	} else if n < 0 {
-		info = -3
-	} else if lda < math.max(1, n) {
-		info = -5
+	opts := if uplo == .upper { 'U' } else { 'L' }
+	nb := ilaenv(1, 'DSYTRD', opts, n, -1, -1, -1)
+	lworkopt := math.max(1, (nb + 2) * n)
+	if lwork == -1 {
+		work[0] = f64(lworkopt)
+		return
 	}
 
-	if info != 0 {
-		return info
+	if a.len < (n - 1) * lda + n {
+		panic(short_a)
+	}
+	if w.len < n {
+		panic(short_w)
 	}
 
-	// Quick return if possible
-	if n == 0 {
-		return 0
+	if n == 1 {
+		w[0] = a[0]
+		work[0] = 2
+		if jobz == .ev_compute {
+			a[0] = 1
+		}
+		return
+	}
+
+	safmin := dlamch_s
+	eps := dlamch_p
+	smlnum := safmin / eps
+	bignum := 1 / smlnum
+	rmin := math.sqrt(smlnum)
+	rmax := math.sqrt(bignum)
+
+	// Scale matrix to allowable range, if necessary.
+	anrm := dlansy(.max_abs, uplo, n, a, lda, mut work)
+	mut scaled := false
+	mut sigma := f64(0)
+	if anrm > 0 && anrm < rmin {
+		scaled = true
+		sigma = rmin / anrm
+	} else if anrm > rmax {
+		scaled = true
+		sigma = rmax / anrm
+	}
+	if scaled {
+		kind := if uplo == .upper { MatrixType.upper_tri } else { MatrixType.lower_tri }
+		dlascl(kind, 0, 0, 1, sigma, n, n, mut a, lda)
 	}
+	inde := 0
+	indtau := inde + n
+	indwork := indtau + n
+	llwork := lwork - indwork
+	dsytrd(uplo, n, mut a, lda, mut w, mut work[inde..], mut work[indtau..], mut work[indwork..],
+		llwork)
 
-	// Call the relevant LAPACK functions
-	// (Here we would call the internal implementations like dsytrd, dorgtr, dormtr, etc.)
+	// For eigenvalues only, call Dsterf. For eigenvectors, first call Dorgtr
+	// to generate the orthogonal matrix, then call Dsteqr.
+	if jobz == .ev_none {
+		if !dsterf(n, mut w, mut work[inde..]) {
+			panic('Dsterf failed')
+		}
+	} else {
+		dorgtr(uplo, n, mut a, lda, mut work[indtau..], mut work[indwork..], llwork)
+		if !dsteqr(EvComp(jobz), n, mut w, mut work[inde..], mut a, lda, mut work[indtau..]) {
+			panic('Dsteqr failed')
+		}
+	}
 
-	// Placeholder for the actual LAPACK function calls
-	// Example: info = dsytrd(uplo, n, a, lda, w, work, lwork)
-	return info
+	// If the matrix was scaled, then rescale eigenvalues appropriately.
+	if scaled {
+		blas.dscal(n, 1 / sigma, mut w, 1)
+	}
+	work[0] = f64(lworkopt)
 }
diff --git a/lapack/lapack64/lapack64.v b/lapack/lapack64/lapack64.v
new file mode 100644
index 000000000..819fe4fe2
--- /dev/null
+++ b/lapack/lapack64/lapack64.v
@@ -0,0 +1,50 @@
+module lapack64
+
+/// dlamch_e is the machine epsilon. For IEEE this is 2^{-53}.
+const dlamch_e = 1.1102230246251565e-16 // 2^-53
+
+// dlamch_b is the radix of the machine (the base of the number system).
+const dlamch_b = 2.0
+
+// dlamch_p is base * eps.
+const dlamch_p = dlamch_b * dlamch_e
+
+// dlamch_s is the "safe minimum", that is, the lowest number such that
+// 1/dlamch_s does not overflow, or also the smallest normal number.
+// For IEEE this is 2^{-1022}.
+const dlamch_s = 2.2250738585072014e-308 // 2^-1022
+
+// Blue's scaling constants
+//
+// An n-vector x is well-scaled if
+//  dtsml ≤ |xᵢ| ≤ dtbig for 0 ≤ i < n and n ≤ 1/dlamch_p,
+// where
+//  dtsml = 2^ceil((expmin-1)/2) = 2^ceil((-1021-1)/2) = 2^{-511} = 1.4916681462400413e-154
+//  dtbig = 2^floor((expmax-digits+1)/2) = 2^floor((1024-53+1)/2) = 2^{486} = 1.997919072202235e+146
+// If any xᵢ is not well-scaled, then multiplying small values by dssml and
+// large values by dsbig avoids underflow or overflow when computing the sum
+// of squares \sum_0^{n-1} (xᵢ)².
+//  dssml = 2^{-floor((expmin-digits)/2)} = 2^{-floor((-1021-53)/2)} = 2^537 = 4.4989137945431964e+161
+//  dsbig = 2^{-ceil((expmax+digits-1)/2)} = 2^{-ceil((1024+53-1)/2)} = 2^{-538} = 1.1113793747425387e-162
+//
+// References:
+//  - Anderson E. (2017)
+//    Algorithm 978: Safe Scaling in the Level 1 BLAS
+//    ACM Trans Math Softw 44:1--28
+//    https://doi.org/10.1145/3061665
+//  - Blue, James L. (1978)
+//    A Portable Fortran Program to Find the Euclidean Norm of a Vector
+//    ACM Trans Math Softw 4:15--23
+//    https://doi.org/10.1145/355769.355771
+
+// dtsml constant
+const dtsml = 1.4916681462400413e-154 // 2^-511
+
+// dtbig constant
+const dtbig = 1.997919072202235e+146 // 2^486
+
+// dssml constant
+const dssml = 4.4989137945431964e+161 // 2^537
+
+// dsbig constant
+const dsbig = 1.1113793747425387e-162 // 2^-538
diff --git a/lapack/lapack_notd_vsl_lapack_lapacke.v b/lapack/lapack_notd_vsl_lapack_lapacke.v
index 2730f4ef6..7719a8c53 100644
--- a/lapack/lapack_notd_vsl_lapack_lapacke.v
+++ b/lapack/lapack_notd_vsl_lapack_lapacke.v
@@ -111,8 +111,8 @@ pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) {
 // where U is an upper triangular matrix and L is lower triangular.
 //
 // This is the block version of the algorithm, calling Level 3 BLAS.
-pub fn dpotrf(up bool, n int, mut a []f64, lda int) {
-	info := lapack64.dpotrf(blas.c_uplo(up), n, mut a, lda)
+pub fn dpotrf(uplo bool, n int, mut a []f64, lda int) {
+	info := lapack64.dpotrf(blas.uplo_from_bool(uplo), n, mut a, lda)
 	if info != 0 {
 		errors.vsl_panic('lapack failed', .efailed)
 	}

From d376d36eba8dddab6abd0cb4bae37da27bdffbc3 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sat, 22 Jun 2024 02:56:21 -0300
Subject: [PATCH 25/33] refactor: Update dlansy.v to use named constants for
 error messages

---
 lapack/lapack64/dlansy.v |  12 ++---
 lapack/lapack64/dlascl.v | 108 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+), 6 deletions(-)
 create mode 100644 lapack/lapack64/dlascl.v

diff --git a/lapack/lapack64/dlansy.v b/lapack/lapack64/dlansy.v
index 04dd3284f..556c5d150 100644
--- a/lapack/lapack64/dlansy.v
+++ b/lapack/lapack64/dlansy.v
@@ -8,16 +8,16 @@ import vsl.blas
 // at least n, otherwise work is unused.
 pub fn dlansy(norm MatrixNorm, uplo blas.Uplo, n int, a []f64, lda int, mut work []f64) f64 {
 	if norm != .max_row_sum && norm != .max_column_sum && norm != .frobenius && norm != .max_abs {
-		panic(lapack64.bad_norm)
+		panic(bad_norm)
 	}
 	if uplo != .upper && uplo != .lower {
-		panic(lapack64.bad_uplo)
+		panic(bad_uplo)
 	}
 	if n < 0 {
 		panic('lapack: n < 0')
 	}
 	if lda < math.max(1, n) {
-		panic(lapack64.bad_ld_a)
+		panic(bad_ld_a)
 	}
 
 	// Quick return if possible.
@@ -25,11 +25,11 @@ pub fn dlansy(norm MatrixNorm, uplo blas.Uplo, n int, a []f64, lda int, mut work
 		return 0.0
 	}
 
-	if a.len < (n-1) * lda + n {
-		panic(lapack64.short_a)
+	if a.len < (n - 1) * lda + n {
+		panic(short_a)
 	}
 	if (norm == .max_column_sum || norm == .max_row_sum) && work.len < n {
-		panic(lapack64.short_work)
+		panic(short_work)
 	}
 
 	match norm {
diff --git a/lapack/lapack64/dlascl.v b/lapack/lapack64/dlascl.v
new file mode 100644
index 000000000..35f4c1294
--- /dev/null
+++ b/lapack/lapack64/dlascl.v
@@ -0,0 +1,108 @@
+module lapack64
+
+import math
+
+// dlascl multiplies an m×n matrix by the scalar cto/cfrom.
+//
+// cfrom must not be zero, and cto and cfrom must not be NaN, otherwise dlascl
+// will panic.
+//
+// dlascl is an internal routine. It is exported for testing purposes.
+pub fn dlascl(kind MatrixType, kl int, ku int, cfrom f64, cto f64, m int, n int, mut a []f64, lda int) {
+	match kind {
+		.general, .upper_tri, .lower_tri {
+			if lda < math.max(1, n) {
+				panic(bad_ld_a)
+			}
+		}
+	}
+	if cfrom == 0.0 {
+		panic(zero_c_from)
+	}
+	if math.is_nan(cfrom) {
+		panic(nan_c_from)
+	}
+	if math.is_nan(cto) {
+		panic(nan_c_to)
+	}
+	if m < 0 {
+		panic(m_lt0)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+
+	if n == 0 || m == 0 {
+		return
+	}
+
+	match kind {
+		.general, .upper_tri, .lower_tri {
+			if a.len < (m - 1) * lda + n {
+				panic(short_a)
+			}
+		}
+	}
+
+	smlnum := dlamch_s
+	bignum := 1.0 / smlnum
+	mut cfromc := cfrom
+	mut ctoc := cto
+	mut cfrom1 := cfromc * smlnum
+	for {
+		mut done := false
+		mut mul := 0.0
+		mut ctol := 0.0
+		if cfrom1 == cfromc {
+			// cfromc is inf.
+			mul = ctoc / cfromc
+			done = true
+			ctol = ctoc
+		} else {
+			ctol = ctoc / bignum
+			if ctol == ctoc {
+				// ctoc is either 0 or inf.
+				mul = ctoc
+				done = true
+				cfromc = 1.0
+			} else if math.abs(cfrom1) > math.abs(ctoc) && ctoc != 0.0 {
+				mul = smlnum
+				done = false
+				cfromc = cfrom1
+			} else if math.abs(ctol) > math.abs(cfromc) {
+				mul = bignum
+				done = false
+				ctoc = ctol
+			} else {
+				mul = ctoc / cfromc
+				done = true
+			}
+		}
+		match kind {
+			.general {
+				for i in 0 .. m {
+					for j in 0 .. n {
+						a[i * lda + j] *= mul
+					}
+				}
+			}
+			.upper_tri {
+				for i in 0 .. m {
+					for j in i .. n {
+						a[i * lda + j] *= mul
+					}
+				}
+			}
+			.lower_tri {
+				for i in 0 .. m {
+					for j in 0 .. math.min(i + 1, n) {
+						a[i * lda + j] *= mul
+					}
+				}
+			}
+		}
+		if done {
+			break
+		}
+	}
+}

From 99a3a2b28886bbe6199cf72db7cbf9e871178034 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sat, 22 Jun 2024 04:43:00 -0300
Subject: [PATCH 26/33] refactor: Update dpotrf function in
 lapack_notd_vsl_lapack_lapacke.v to use named constant for uplo parameter

---
 blas/conversions.v                      |  10 -
 lapack/lapack64/dlae2.v                 |  47 ++++
 lapack/lapack64/dlanst.v                |  68 ++++++
 lapack/lapack64/dlapy2.v                |  10 +
 lapack/lapack64/dlasrt.v                |  28 +++
 lapack/lapack64/dorg2l.v                |  76 +++++++
 lapack/lapack64/dorgql.v                | 140 ++++++++++++
 lapack/lapack64/dorgtr.v                | 107 +++++++++
 lapack/lapack64/dsterf.v                | 280 ++++++++++++++++++++++++
 lapack/lapack64/dsytrd.v                | 185 ++++++++++++++++
 lapack/lapack_notd_vsl_lapack_lapacke.v |   4 +-
 11 files changed, 943 insertions(+), 12 deletions(-)
 create mode 100644 lapack/lapack64/dlae2.v
 create mode 100644 lapack/lapack64/dlanst.v
 create mode 100644 lapack/lapack64/dlapy2.v
 create mode 100644 lapack/lapack64/dlasrt.v
 create mode 100644 lapack/lapack64/dorg2l.v
 create mode 100644 lapack/lapack64/dorgql.v
 create mode 100644 lapack/lapack64/dorgtr.v
 create mode 100644 lapack/lapack64/dsterf.v
 create mode 100644 lapack/lapack64/dsytrd.v

diff --git a/blas/conversions.v b/blas/conversions.v
index 18ace678c..b421fa60c 100644
--- a/blas/conversions.v
+++ b/blas/conversions.v
@@ -21,16 +21,6 @@ pub type Diagonal = blas64.Diagonal
 // Side is used to specify whether a matrix is on the left or right side in a matrix-matrix multiplication.
 pub type Side = blas64.Side
 
-// uplo_from_bool converts a boolean to Uplo.
-pub fn uplo_from_bool(uplo bool) Uplo {
-	return if uplo { .upper } else { .lower }
-}
-
-// uplo_to_bool converts Uplo to a boolean.
-pub fn uplo_to_bool(uplo Uplo) bool {
-	return uplo == .upper
-}
-
 // slice_to_col_major converts nested slice into an array representing a col-major matrix
 //
 // _**NOTE**: make sure to have at least 1x1 item_
diff --git a/lapack/lapack64/dlae2.v b/lapack/lapack64/dlae2.v
new file mode 100644
index 000000000..59111f8ed
--- /dev/null
+++ b/lapack/lapack64/dlae2.v
@@ -0,0 +1,47 @@
+module lapack64
+
+import math
+
+// dlae2 computes the eigenvalues of a 2×2 symmetric matrix
+//
+//  [a b]
+//  [b c]
+//
+// and returns the eigenvalue with the larger absolute value as rt1 and the
+// smaller as rt2.
+//
+// dlae2 is an internal routine. It is exported for testing purposes.
+pub fn dlae2(a f64, b f64, c f64) (f64, f64) {
+	sm := a + c
+	df := a - c
+	adf := math.abs(df)
+	tb := b + b
+	ab := math.abs(tb)
+	mut acmx := c
+	mut acmn := a
+	if math.abs(a) > math.abs(c) {
+		acmx = a
+		acmn = c
+	}
+	mut rt := 0.0
+	if adf > ab {
+		rt = adf * math.sqrt(1.0 + (ab / adf) * (ab / adf))
+	} else if adf < ab {
+		rt = ab * math.sqrt(1.0 + (adf / ab) * (adf / ab))
+	} else {
+		rt = ab * math.sqrt(2.0)
+	}
+	mut rt1 := 0.0
+	mut rt2 := 0.0
+	if sm < 0 {
+		rt1 = 0.5 * (sm - rt)
+		rt2 = (acmx / rt1) * acmn - (b / rt1) * b
+	} else if sm > 0 {
+		rt1 = 0.5 * (sm + rt)
+		rt2 = (acmx / rt1) * acmn - (b / rt1) * b
+	} else {
+		rt1 = 0.5 * rt
+		rt2 = -0.5 * rt
+	}
+	return rt1, rt2
+}
diff --git a/lapack/lapack64/dlanst.v b/lapack/lapack64/dlanst.v
new file mode 100644
index 000000000..24cb91461
--- /dev/null
+++ b/lapack/lapack64/dlanst.v
@@ -0,0 +1,68 @@
+module lapack64
+
+import math
+
+// dlanst computes the specified norm of a symmetric tridiagonal matrix A.
+// The diagonal elements of A are stored in d and the off-diagonal elements
+// are stored in e.
+pub fn dlanst(norm MatrixNorm, n int, d []f64, e []f64) f64 {
+	if norm != .max_row_sum && norm != .max_column_sum && norm != .frobenius && norm != .max_abs {
+		panic(bad_norm)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if n == 0 {
+		return 0.0
+	}
+	if d.len < n {
+		panic(short_d)
+	}
+	if e.len < n - 1 {
+		panic(short_e)
+	}
+
+	match norm {
+		.max_abs {
+			mut anorm := math.abs(d[n - 1])
+			for i in 0 .. n - 1 {
+				mut sum := math.abs(d[i])
+				if anorm < sum || math.is_nan(sum) {
+					anorm = sum
+				}
+				sum = math.abs(e[i])
+				if anorm < sum || math.is_nan(sum) {
+					anorm = sum
+				}
+			}
+			return anorm
+		}
+		.max_row_sum, .max_column_sum {
+			if n == 1 {
+				return math.abs(d[0])
+			}
+			mut anorm := math.abs(d[0]) + math.abs(e[0])
+			mut sum := math.abs(e[n - 2]) + math.abs(d[n - 1])
+			if anorm < sum || math.is_nan(sum) {
+				anorm = sum
+			}
+			for i in 1 .. n - 1 {
+				sum = math.abs(d[i]) + math.abs(e[i]) + math.abs(e[i - 1])
+				if anorm < sum || math.is_nan(sum) {
+					anorm = sum
+				}
+			}
+			return anorm
+		}
+		.frobenius {
+			mut scale := 0.0
+			mut sum := 1.0
+			if n > 1 {
+				scale, sum = dlassq(n - 1, e, 1, scale, sum)
+				sum = 2 * sum
+			}
+			scale, sum = dlassq(n, d, 1, scale, sum)
+			return scale * math.sqrt(sum)
+		}
+	}
+}
diff --git a/lapack/lapack64/dlapy2.v b/lapack/lapack64/dlapy2.v
new file mode 100644
index 000000000..f5b055d3a
--- /dev/null
+++ b/lapack/lapack64/dlapy2.v
@@ -0,0 +1,10 @@
+module lapack64
+
+import math
+
+// dlapy2 is the LAPACK version of math.hypot.
+//
+// dlapy2 is an internal routine. It is exported for testing purposes.
+pub fn dlapy2(x f64, y f64) f64 {
+	return math.hypot(x, y)
+}
diff --git a/lapack/lapack64/dlasrt.v b/lapack/lapack64/dlasrt.v
new file mode 100644
index 000000000..39b401b33
--- /dev/null
+++ b/lapack/lapack64/dlasrt.v
@@ -0,0 +1,28 @@
+module lapack64
+
+import math
+
+// dlasrt sorts the numbers in the input slice d. If s == .increasing,
+// the elements are sorted in increasing order. If s == .decreasing,
+// the elements are sorted in decreasing order. For other values of s dlasrt
+// will panic.
+//
+// dlasrt is an internal routine. It is exported for testing purposes.
+pub fn dlasrt(s Sort, n int, mut d []f64) {
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if d.len < n {
+		panic(short_d)
+	}
+
+	d = unsafe { d[..n] }
+	match s {
+		.sort_increasing {
+			d.sort()
+		}
+		.sort_decreasing {
+			d.sort(b < a)
+		}
+	}
+}
diff --git a/lapack/lapack64/dorg2l.v b/lapack/lapack64/dorg2l.v
new file mode 100644
index 000000000..2c2070c79
--- /dev/null
+++ b/lapack/lapack64/dorg2l.v
@@ -0,0 +1,76 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// dorg2l generates an m×n matrix Q with orthonormal columns which is defined
+// as the last n columns of a product of k elementary reflectors of order m.
+//
+//  Q = H_{k-1} * ... * H_1 * H_0
+//
+// See dgelqf for more information. It must be that m >= n >= k.
+//
+// tau contains the scalar reflectors computed by dgeqlf. tau must have length
+// at least k, and dorg2l will panic otherwise.
+//
+// work contains temporary memory, and must have length at least n. dorg2l will
+// panic otherwise.
+//
+// dorg2l is an internal routine. It is exported for testing purposes.
+pub fn dorg2l(m int, n int, k int, mut a []f64, lda int, tau []f64, mut work []f64) {
+	if m < 0 {
+		panic(m_lt0)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if n > m {
+		panic(n_gtm)
+	}
+	if k < 0 {
+		panic(k_lt0)
+	}
+	if k > n {
+		panic(k_gtn)
+	}
+	if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	}
+
+	if n == 0 {
+		return
+	}
+
+	if a.len < (m - 1) * lda + n {
+		panic(short_a)
+	}
+	if tau.len < k {
+		panic(short_tau)
+	}
+	if work.len < n {
+		panic(short_work)
+	}
+
+	// Initialize columns 0:n-k to columns of the unit matrix.
+	for j := 0; j < n - k; j++ {
+		for l := 0; l < m; l++ {
+			a[l * lda + j] = 0
+		}
+		a[(m - n + j) * lda + j] = 1
+	}
+
+	for i := 0; i < k; i++ {
+		ii := n - k + i
+
+		// Apply H_i to A[0:m-k+i, 0:n-k+i] from the left.
+		a[(m - n + ii) * lda + ii] = 1
+		dlarf(.left, m - n + ii + 1, ii, mut a[ii..], lda, tau[i], mut a, lda, mut work)
+		blas.dscal(m - n + ii, -tau[i], mut a[ii..], lda)
+		a[(m - n + ii) * lda + ii] = 1 - tau[i]
+
+		// Set A[m-k+i:m, n-k+i+1] to zero.
+		for l := m - n + ii + 1; l < m; l++ {
+			a[l * lda + ii] = 0
+		}
+	}
+}
diff --git a/lapack/lapack64/dorgql.v b/lapack/lapack64/dorgql.v
new file mode 100644
index 000000000..b16636e57
--- /dev/null
+++ b/lapack/lapack64/dorgql.v
@@ -0,0 +1,140 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// dorgql generates the m×n matrix Q with orthonormal columns defined as the
+// last n columns of a product of k elementary reflectors of order m
+//
+//  Q = H_{k-1} * ... * H_1 * H_0.
+//
+// It must hold that
+//
+//  0 <= k <= n <= m,
+//
+// and dorgql will panic otherwise.
+//
+// On entry, the (n-k+i)-th column of A must contain the vector which defines
+// the elementary reflector H_i, for i=0,...,k-1, and tau[i] must contain its
+// scalar factor. On return, a contains the m×n matrix Q.
+//
+// tau must have length at least k, and dorgql will panic otherwise.
+//
+// work must have length at least max(1,lwork), and lwork must be at least
+// max(1,n), otherwise dorgql will panic. For optimum performance lwork must
+// be a sufficiently large multiple of n.
+//
+// If lwork == -1, instead of computing dorgql the optimal work length is stored
+// into work[0].
+//
+// dorgql is an internal routine. It is exported for testing purposes.
+pub fn dorgql(m int, n int, k int, mut a []f64, lda int, tau []f64, mut work []f64, lwork int) {
+	if m < 0 {
+		panic(m_lt0)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if n > m {
+		panic(n_gtm)
+	}
+	if k < 0 {
+		panic(k_lt0)
+	}
+	if k > n {
+		panic(k_gtn)
+	}
+	if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	}
+	if lwork < math.max(1, n) && lwork != -1 {
+		panic(bad_l_work)
+	}
+	if work.len < math.max(1, lwork) {
+		panic(short_work)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		work[0] = 1
+		return
+	}
+
+	mut nb := ilaenv(1, 'DORGQL', ' ', m, n, k, -1)
+	if lwork == -1 {
+		work[0] = f64(n * nb)
+		return
+	}
+
+	if a.len < (m - 1) * lda + n {
+		panic(short_a)
+	}
+	if tau.len < k {
+		panic(short_tau)
+	}
+
+	mut nbmin := 2
+	mut nx := 0
+	mut ldwork := 0
+	mut iws := n
+	if 1 < nb && nb < k {
+		// Determine when to cross over from blocked to unblocked code.
+		nx = math.max(0, ilaenv(3, 'DORGQL', ' ', m, n, k, -1))
+		if nx < k {
+			// Determine if workspace is large enough for blocked code.
+			iws = n * nb
+			if lwork < iws {
+				// Not enough workspace to use optimal nb: reduce nb and determine
+				// the minimum value of nb.
+				nb = lwork / n
+				nbmin = math.max(2, ilaenv(2, 'DORGQL', ' ', m, n, k, -1))
+			}
+			ldwork = nb
+		}
+	}
+
+	mut kk := 0
+	if nbmin <= nb && nb < k && nx < k {
+		// Use blocked code after the first block. The last kk columns are handled
+		// by the block method.
+		kk = math.min(k, ((k - nx + nb - 1) / nb) * nb)
+
+		// Set A(m-kk:m, 0:n-kk) to zero.
+		for i := m - kk; i < m; i++ {
+			for j := 0; j < n - kk; j++ {
+				a[i * lda + j] = 0
+			}
+		}
+	}
+
+	// Use unblocked code for the first or only block.
+	dorg2l(m - kk, n - kk, k - kk, mut a, lda, tau, mut work)
+	if kk > 0 {
+		// Use blocked code.
+		for i := k - kk; i < k; i += nb {
+			ib := math.min(nb, k - i)
+			if n - k + i > 0 {
+				// Form the triangular factor of the block reflector
+				// H = H_{i+ib-1} * ... * H_{i+1} * H_i.
+				dlarft(.backward, .column_wise, m - k + i + ib, ib, mut a[n - k + i..],
+					lda, tau[i..], mut work, ldwork)
+
+				// Apply H to A[0:m-k+i+ib, 0:n-k+i] from the left.
+				dlarfb(.left, .no_trans, .backward, .column_wise, m - k + i + ib, n - k + i,
+					ib, a[n - k + i..], lda, work, ldwork, mut a, lda, mut work[ib * ldwork..],
+					ldwork)
+			}
+
+			// Apply H to rows 0:m-k+i+ib of current block.
+			dorg2l(m - k + i + ib, ib, ib, mut a[n - k + i..], lda, tau[i..], mut work)
+
+			// Set rows m-k+i+ib:m of current block to zero.
+			for j := n - k + i; j < n - k + i + ib; j++ {
+				for l := m - k + i + ib; l < m; l++ {
+					a[l * lda + j] = 0
+				}
+			}
+		}
+	}
+	work[0] = f64(iws)
+}
diff --git a/lapack/lapack64/dorgtr.v b/lapack/lapack64/dorgtr.v
new file mode 100644
index 000000000..bf3829715
--- /dev/null
+++ b/lapack/lapack64/dorgtr.v
@@ -0,0 +1,107 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// dorgtr generates a real orthogonal matrix Q which is defined as the product
+// of n-1 elementary reflectors of order n as returned by dsytrd.
+//
+// The construction of Q depends on the value of uplo:
+//
+//  Q = H_{n-1} * ... * H_1 * H_0  if uplo == blas.Upper
+//  Q = H_0 * H_1 * ... * H_{n-1}  if uplo == blas.Lower
+//
+// where H_i is constructed from the elementary reflectors as computed by dsytrd.
+// See the documentation for dsytrd for more information.
+//
+// tau must have length at least n-1, and dorgtr will panic otherwise.
+//
+// work is temporary storage, and lwork specifies the usable memory length. At
+// minimum, lwork >= max(1,n-1), and dorgtr will panic otherwise. The amount of blocking
+// is limited by the usable length.
+// If lwork == -1, instead of computing dorgtr the optimal work length is stored
+// into work[0].
+//
+// dorgtr is an internal routine. It is exported for testing purposes.
+pub fn dorgtr(uplo blas.Uplo, n int, mut a []f64, lda int, tau []f64, mut work []f64, lwork int) {
+	if uplo != .upper && uplo != .lower {
+		panic(bad_uplo)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	}
+	if lwork < math.max(1, n - 1) && lwork != -1 {
+		panic(bad_l_work)
+	}
+	if work.len < math.max(1, lwork) {
+		panic(short_work)
+	}
+
+	if n == 0 {
+		work[0] = 1
+		return
+	}
+
+	mut nb := 0
+	if uplo == .upper {
+		nb = ilaenv(1, 'DORGQL', ' ', n - 1, n - 1, n - 1, -1)
+	} else {
+		nb = ilaenv(1, 'DORGQR', ' ', n - 1, n - 1, n - 1, -1)
+	}
+	lworkopt := math.max(1, n - 1) * nb
+	if lwork == -1 {
+		work[0] = f64(lworkopt)
+		return
+	}
+
+	if a.len < (n - 1) * lda + n {
+		panic(short_a)
+	}
+	if tau.len < n - 1 {
+		panic(short_tau)
+	}
+
+	if uplo == .upper {
+		// Q was determined by a call to dsytrd with uplo == blas.Upper.
+		// Shift the vectors which define the elementary reflectors one column
+		// to the left, and set the last row and column of Q to those of the unit
+		// matrix.
+		for j := 0; j < n - 1; j++ {
+			for i := 0; i < j; i++ {
+				a[i * lda + j] = unsafe { a[i * lda + j + 1] }
+			}
+			a[(n - 1) * lda + j] = 0
+		}
+		for i := 0; i < n - 1; i++ {
+			a[i * lda + n - 1] = 0
+		}
+		a[(n - 1) * lda + n - 1] = 1
+
+		// Generate Q[0:n-1, 0:n-1].
+		dorgql(n - 1, n - 1, n - 1, mut a, lda, tau, mut work, lwork)
+	} else {
+		// Q was determined by a call to dsytrd with uplo == blas.Lower.
+		// Shift the vectors which define the elementary reflectors one column
+		// to the right, and set the first row and column of Q to those of the unit
+		// matrix.
+		for j := n - 1; j > 0; j-- {
+			a[j] = 0
+			for i := j + 1; i < n; i++ {
+				a[i * lda + j] = unsafe { a[i * lda + j - 1] }
+			}
+		}
+		a[0] = 1
+		for i := 1; i < n; i++ {
+			a[i * lda] = 0
+		}
+		if n > 1 {
+			mut a_sub := unsafe { a[lda + 1..] }
+			// Generate Q[1:n, 1:n].
+			dorgqr(n - 1, n - 1, n - 1, mut a_sub, lda, tau[..n - 1], mut work, lwork)
+		}
+	}
+	work[0] = f64(lworkopt)
+}
diff --git a/lapack/lapack64/dsterf.v b/lapack/lapack64/dsterf.v
new file mode 100644
index 000000000..c0db958aa
--- /dev/null
+++ b/lapack/lapack64/dsterf.v
@@ -0,0 +1,280 @@
+module lapack64
+
+import math
+
+// dsterf computes all eigenvalues of a symmetric tridiagonal matrix using the
+// Pal-Walker-Kahan variant of the QL or QR algorithm.
+//
+// d contains the diagonal elements of the tridiagonal matrix on entry, and
+// contains the eigenvalues in ascending order on exit. d must have length at
+// least n, or dsterf will panic.
+//
+// e contains the off-diagonal elements of the tridiagonal matrix on entry, and is
+// overwritten during the call to dsterf. e must have length of at least n-1 or
+// dsterf will panic.
+//
+// dsterf is an internal routine. It is exported for testing purposes.
+pub fn dsterf(n int, mut d []f64, mut e []f64) bool {
+	if n < 0 {
+		panic(n_lt0)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return true
+	}
+
+	if d.len < n {
+		panic(short_d)
+	}
+	if e.len < n - 1 {
+		panic(short_e)
+	}
+
+	if n == 1 {
+		return true
+	}
+
+	none_scaled := 0 // The values are not scaled.
+	down := 1 // The values are scaled below ssfmax threshold.
+	up := 2 // The values are scaled below ssfmin threshold.
+
+	// Determine the unit roundoff for this environment.
+	eps := dlamch_e
+	eps2 := eps * eps
+	safmin := dlamch_s
+	safmax := 1.0 / safmin
+	ssfmax := math.sqrt(safmax) / 3.0
+	ssfmin := math.sqrt(safmin) / eps2
+
+	// Compute the eigenvalues of the tridiagonal matrix.
+	maxit := 30
+	nmaxit := n * maxit
+	mut jtot := 0
+
+	mut l1 := 0
+
+	for {
+		if l1 > n - 1 {
+			dlasrt(.sort_increasing, n, mut d)
+			return true
+		}
+		if l1 > 0 {
+			e[l1 - 1] = 0
+		}
+		mut m := 0
+		for m = l1; m < n - 1; m++ {
+			if math.abs(e[m]) <= math.sqrt(math.abs(d[m])) * math.sqrt(math.abs(d[m + 1])) * eps {
+				e[m] = 0
+				break
+			}
+		}
+
+		mut l := l1
+		lsv := l
+		mut lend := m
+		lendsv := lend
+		l1 = m + 1
+		if lend == 0 {
+			continue
+		}
+
+		// Scale submatrix in rows and columns l to lend.
+		anorm := dlanst(.max_abs, lend - l + 1, d[l..], e[l..])
+		mut iscale := none_scaled
+		if anorm == 0.0 {
+			continue
+		}
+		if anorm > ssfmax {
+			iscale = down
+			dlascl(.general, 0, 0, anorm, ssfmax, lend - l + 1, 1, mut d[l..], n)
+			dlascl(.general, 0, 0, anorm, ssfmax, lend - l, 1, mut e[l..], n)
+		} else if anorm < ssfmin {
+			iscale = up
+			dlascl(.general, 0, 0, anorm, ssfmin, lend - l + 1, 1, mut d[l..], n)
+			dlascl(.general, 0, 0, anorm, ssfmin, lend - l, 1, mut e[l..], n)
+		}
+
+		mut el := unsafe { e[l..lend] }
+		for i, v in el {
+			el[i] *= v
+		}
+
+		// Choose between QL and QR iteration.
+		if math.abs(d[lend]) < math.abs(d[l]) {
+			lend = lsv
+			l = lendsv
+		}
+		if lend >= l {
+			// QL Iteration.
+			// Look for small sub-diagonal element.
+			for {
+				if l != lend {
+					for m = l; m < lend; m++ {
+						if math.abs(e[m]) <= eps2 * (math.abs(d[m] * d[m + 1])) {
+							break
+						}
+					}
+				} else {
+					m = lend
+				}
+				if m < lend {
+					e[m] = 0
+				}
+				mut p := d[l]
+				if m == l {
+					// Eigenvalue found.
+					l++
+					if l > lend {
+						break
+					}
+					continue
+				}
+				// If remaining matrix is 2 by 2, use Dlae2 to compute its eigenvalues.
+				if m == l + 1 {
+					d[l], d[l + 1] = dlae2(d[l], math.sqrt(e[l]), d[l + 1])
+					e[l] = 0
+					l += 2
+					if l > lend {
+						break
+					}
+					continue
+				}
+				if jtot == nmaxit {
+					break
+				}
+				jtot++
+
+				// Form shift.
+				rte := math.sqrt(e[l])
+				mut sigma := (d[l + 1] - p) / (2.0 * rte)
+				r := dlapy2(sigma, 1.0)
+				sigma = p - (rte / (sigma + math.copysign(r, sigma)))
+
+				mut c := 1.0
+				mut s := 0.0
+				mut gamma := d[m] - sigma
+				p = gamma * gamma
+
+				// Inner loop.
+				for i := m - 1; i >= l; i-- {
+					bb := e[i]
+					r_ := p + bb
+					if i != m - 1 {
+						e[i + 1] = s * r_
+					}
+					oldc := c
+					c = p / r_
+					s = bb / r_
+					oldgam := gamma
+					alpha := d[i]
+					gamma = c * (alpha - sigma) - s * oldgam
+					d[i + 1] = oldgam + (alpha - gamma)
+					if c != 0.0 {
+						p = (gamma * gamma) / c
+					} else {
+						p = oldc * bb
+					}
+				}
+				e[l] = s * p
+				d[l] = sigma + gamma
+			}
+		} else {
+			for {
+				// QR Iteration.
+				// Look for small super-diagonal element.
+				for m = l; m > lend; m-- {
+					if math.abs(e[m - 1]) <= eps2 * math.abs(d[m] * d[m - 1]) {
+						break
+					}
+				}
+				if m > lend {
+					e[m - 1] = 0
+				}
+				mut p := d[l]
+				if m == l {
+					// Eigenvalue found.
+					l--
+					if l < lend {
+						break
+					}
+					continue
+				}
+
+				// If remaining matrix is 2 by 2, use Dlae2 to compute its eigenvalues.
+				if m == l - 1 {
+					d[l], d[l - 1] = dlae2(d[l], math.sqrt(e[l - 1]), d[l - 1])
+					e[l - 1] = 0
+					l -= 2
+					if l < lend {
+						break
+					}
+					continue
+				}
+				if jtot == nmaxit {
+					break
+				}
+				jtot++
+
+				// Form shift.
+				rte := math.sqrt(e[l - 1])
+				mut sigma := (d[l - 1] - p) / (2.0 * rte)
+				r := dlapy2(sigma, 1.0)
+				sigma = p - (rte / (sigma + math.copysign(r, sigma)))
+
+				mut c := 1.0
+				mut s := 0.0
+				mut gamma := d[m] - sigma
+				p = gamma * gamma
+
+				// Inner loop.
+				for i := m; i < l; i++ {
+					bb := e[i]
+					r_ := p + bb
+					if i != m {
+						e[i - 1] = s * r_
+					}
+					oldc := c
+					c = p / r_
+					s = bb / r_
+					oldgam := gamma
+					alpha := d[i + 1]
+					gamma = c * (alpha - sigma) - s * oldgam
+					d[i] = oldgam + alpha - gamma
+					if c != 0.0 {
+						p = (gamma * gamma) / c
+					} else {
+						p = oldc * bb
+					}
+				}
+				e[l - 1] = s * p
+				d[l] = sigma + gamma
+			}
+		}
+
+		// Undo scaling if necessary
+		match iscale {
+			down {
+				dlascl(.general, 0, 0, ssfmax, anorm, lendsv - lsv + 1, 1, mut d[lsv..],
+					n)
+			}
+			up {
+				dlascl(.general, 0, 0, ssfmin, anorm, lendsv - lsv + 1, 1, mut d[lsv..],
+					n)
+			}
+			else {}
+		}
+
+		// Check for no convergence to an eigenvalue after a total of n*maxit iterations.
+		if jtot >= nmaxit {
+			break
+		}
+	}
+	for v in e[0..n - 1] {
+		if v != 0.0 {
+			return false
+		}
+	}
+	dlasrt(.sort_increasing, n, mut d)
+	return true
+}
diff --git a/lapack/lapack64/dsytrd.v b/lapack/lapack64/dsytrd.v
new file mode 100644
index 000000000..e68f16259
--- /dev/null
+++ b/lapack/lapack64/dsytrd.v
@@ -0,0 +1,185 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// dsytrd reduces a symmetric n×n matrix A to symmetric tridiagonal form by an
+// orthogonal similarity transformation
+//
+//  Qᵀ * A * Q = T
+//
+// where Q is an orthonormal matrix and T is symmetric and tridiagonal.
+//
+// On entry, a contains the elements of the input matrix in the triangle specified
+// by uplo. On exit, the diagonal and sub/super-diagonal are overwritten by the
+// corresponding elements of the tridiagonal matrix T. The remaining elements in
+// the triangle, along with the array tau, contain the data to construct Q as
+// the product of elementary reflectors.
+//
+// If uplo == blas.upper, Q is constructed with
+//
+//  Q = H_{n-2} * ... * H_1 * H_0
+//
+// where
+//
+//  H_i = I - tau_i * v * vᵀ
+//
+// v is constructed as v[i+1:n] = 0, v[i] = 1, v[0:i-1] is stored in A[0:i-1, i+1].
+// The elements of A are
+//
+//  [ d   e  v1  v2  v3]
+//  [     d   e  v2  v3]
+//  [         d   e  v3]
+//  [             d   e]
+//  [                 e]
+//
+// If uplo == blas.lower, Q is constructed with
+//
+//  Q = H_0 * H_1 * ... * H_{n-2}
+//
+// where
+//
+//  H_i = I - tau_i * v * vᵀ
+//
+// v is constructed as v[0:i+1] = 0, v[i+1] = 1, v[i+2:n] is stored in A[i+2:n, i].
+// The elements of A are
+//
+//  [ d                ]
+//  [ e   d            ]
+//  [v0   e   d        ]
+//  [v0  v1   e   d    ]
+//  [v0  v1  v2   e   d]
+//
+// d must have length n, and e and tau must have length n-1. dsytrd will panic if
+// these conditions are not met.
+//
+// work is temporary storage, and lwork specifies the usable memory length. At minimum,
+// lwork >= 1, and dsytrd will panic otherwise. The amount of blocking is
+// limited by the usable length.
+// If lwork == -1, instead of computing dsytrd the optimal work length is stored
+// into work[0].
+//
+// dsytrd is an internal routine. It is exported for testing purposes.
+pub fn dsytrd(uplo blas.Uplo, n int, mut a []f64, lda int, mut d []f64, mut e []f64, mut tau []f64, mut work []f64, lwork int) {
+	if uplo != .upper && uplo != .lower {
+		panic(bad_uplo)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	}
+	if lwork < 1 && lwork != -1 {
+		panic(bad_l_work)
+	}
+	if work.len < math.max(1, lwork) {
+		panic(short_work)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		work[0] = 1
+		return
+	}
+
+	mut nb := ilaenv(1, 'DSYTRD', if uplo == .upper { 'U' } else { 'L' }, n, -1, -1, -1)
+	lworkopt := n * nb
+	if lwork == -1 {
+		work[0] = f64(lworkopt)
+		return
+	}
+
+	if a.len < (n - 1) * lda + n {
+		panic(short_a)
+	}
+	if d.len < n {
+		panic(short_d)
+	}
+	if e.len < n - 1 {
+		panic(short_e)
+	}
+	if tau.len < n - 1 {
+		panic(short_tau)
+	}
+
+	mut nx := n
+	mut iws := 1
+	mut ldwork := 0
+	if 1 < nb && nb < n {
+		// Determine when to cross over from blocked to unblocked code. The last
+		// block is always handled by unblocked code.
+		nx = math.max(nb, ilaenv(3, 'DSYTRD', if uplo == .upper { 'U' } else { 'L' },
+			n, -1, -1, -1))
+		if nx < n {
+			// Determine if workspace is large enough for blocked code.
+			ldwork = nb
+			iws = n * ldwork
+			if lwork < iws {
+				// Not enough workspace to use optimal nb: determine the minimum
+				// value of nb and reduce nb or force use of unblocked code by
+				// setting nx = n.
+				nb = math.max(lwork / n, 1)
+				nbmin := ilaenv(2, 'DSYTRD', if uplo == .upper { 'U' } else { 'L' }, n,
+					-1, -1, -1)
+				if nb < nbmin {
+					nx = n
+				}
+			}
+		} else {
+			nx = n
+		}
+	} else {
+		nb = 1
+	}
+	ldwork = nb
+
+	if uplo == .upper {
+		// Reduce the upper triangle of A. Columns 0:kk are handled by the
+		// unblocked method.
+		mut i := 0
+		kk := n - ((n - nx + nb - 1) / nb) * nb
+		for i = n - nb; i >= kk; i -= nb {
+			// Reduce columns i:i+nb to tridiagonal form and form the matrix W
+			// which is needed to update the unreduced part of the matrix.
+			dlatrd(uplo, i + nb, nb, mut a, lda, mut e, mut tau, mut work, ldwork)
+
+			// Update the unreduced submatrix A[0:i-1,0:i-1], using an update
+			// of the form A = A - V*Wᵀ - W*Vᵀ.
+			blas.dsyr2k(uplo, .no_trans, i, nb, -1.0, a[i * lda..], lda, work, ldwork,
+				1.0, mut a, lda)
+
+			// Copy superdiagonal elements back into A, and diagonal elements into D.
+			for j := i; j < i + nb; j++ {
+				a[(j - 1) * lda + j] = e[j - 1]
+				d[j] = a[j * lda + j]
+			}
+		}
+		// Use unblocked code to reduce the last or only block
+		dsytd2(uplo, kk, mut a, lda, mut d, mut e, mut tau)
+	} else {
+		mut i := 0
+		// Reduce the lower triangle of A.
+		for i = 0; i < n - nx; i += nb {
+			// Reduce columns 0:i+nb to tridiagonal form and form the matrix W
+			// which is needed to update the unreduced part of the matrix.
+			dlatrd(uplo, n - i, nb, mut a[i * lda + i..], lda, mut e[i..], mut tau[i..], mut
+				work, ldwork)
+
+			// Update the unreduced submatrix A[i+ib:n, i+ib:n], using an update
+			// of the form A = A + V*Wᵀ - W*Vᵀ.
+			blas.dsyr2k(uplo, .no_trans, n - i - nb, nb, -1.0, a[(i + nb) * lda + i..],
+				lda, work[nb * ldwork..], ldwork, 1.0, mut a[(i + nb) * lda + i + nb..],
+				lda)
+
+			// Copy subdiagonal elements back into A, and diagonal elements into D.
+			for j := i; j < i + nb; j++ {
+				a[(j + 1) * lda + j] = e[j]
+				d[j] = a[j * lda + j]
+			}
+		}
+		// Use unblocked code to reduce the last or only block.
+		dsytd2(uplo, n - i, mut a[i * lda + i..], lda, mut d[i..], mut e[i..], mut tau[i..])
+	}
+	work[0] = f64(iws)
+}
diff --git a/lapack/lapack_notd_vsl_lapack_lapacke.v b/lapack/lapack_notd_vsl_lapack_lapacke.v
index 7719a8c53..e16020046 100644
--- a/lapack/lapack_notd_vsl_lapack_lapacke.v
+++ b/lapack/lapack_notd_vsl_lapack_lapacke.v
@@ -111,8 +111,8 @@ pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) {
 // where U is an upper triangular matrix and L is lower triangular.
 //
 // This is the block version of the algorithm, calling Level 3 BLAS.
-pub fn dpotrf(uplo bool, n int, mut a []f64, lda int) {
-	info := lapack64.dpotrf(blas.uplo_from_bool(uplo), n, mut a, lda)
+pub fn dpotrf(uplo blas.Uplo, n int, mut a []f64, lda int) {
+	info := lapack64.dpotrf(uplo, n, mut a, lda)
 	if info != 0 {
 		errors.vsl_panic('lapack failed', .efailed)
 	}

From 5f9a1fe0e5c16c0b2602805a7312691e2de105a6 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 23 Jun 2024 03:59:36 -0300
Subject: [PATCH 27/33] refactor: Update conversions.v, dgetf2.v, dsyev.v, and
 lapack_notd_vsl_lapack_lapacke.v

---
 blas/blas64/conversions.v |   1 +
 lapack/lapack64/dlaev2.v  |  73 ++++++++
 lapack/lapack64/dlarf.v   |  92 ++++++++++
 lapack/lapack64/dlarfb.v  | 259 ++++++++++++++++++++++++++
 lapack/lapack64/dlarfg.v  |  51 ++++++
 lapack/lapack64/dlarft.v  | 155 ++++++++++++++++
 lapack/lapack64/dlartg.v  |  56 ++++++
 lapack/lapack64/dlaset.v  |  59 ++++++
 lapack/lapack64/dlasr.v   | 277 ++++++++++++++++++++++++++++
 lapack/lapack64/dlatrd.v  | 108 +++++++++++
 lapack/lapack64/dorg2l.v  |   2 +-
 lapack/lapack64/dorg2r.v  |  66 +++++++
 lapack/lapack64/dorgql.v  |   4 +-
 lapack/lapack64/dorgqr.v  | 104 +++++++++++
 lapack/lapack64/dsteqr.v  | 375 ++++++++++++++++++++++++++++++++++++++
 lapack/lapack64/dsyev.v   |   4 +-
 lapack/lapack64/dsytd2.v  | 144 +++++++++++++++
 lapack/lapack64/errors.v  |   6 +-
 lapack/lapack64/iladlc.v  |  42 +++++
 lapack/lapack64/iladlr.v  |  38 ++++
 20 files changed, 1908 insertions(+), 8 deletions(-)
 create mode 100644 lapack/lapack64/dlaev2.v
 create mode 100644 lapack/lapack64/dlarf.v
 create mode 100644 lapack/lapack64/dlarfb.v
 create mode 100644 lapack/lapack64/dlarfg.v
 create mode 100644 lapack/lapack64/dlarft.v
 create mode 100644 lapack/lapack64/dlartg.v
 create mode 100644 lapack/lapack64/dlaset.v
 create mode 100644 lapack/lapack64/dlasr.v
 create mode 100644 lapack/lapack64/dlatrd.v
 create mode 100644 lapack/lapack64/dorg2r.v
 create mode 100644 lapack/lapack64/dorgqr.v
 create mode 100644 lapack/lapack64/dsteqr.v
 create mode 100644 lapack/lapack64/dsytd2.v
 create mode 100644 lapack/lapack64/iladlc.v
 create mode 100644 lapack/lapack64/iladlr.v

diff --git a/blas/blas64/conversions.v b/blas/blas64/conversions.v
index 6688d5887..cf0b75a6e 100644
--- a/blas/blas64/conversions.v
+++ b/blas/blas64/conversions.v
@@ -18,6 +18,7 @@ pub enum Transpose {
 pub enum Uplo {
 	upper = 121
 	lower = 122
+	all   = 99
 }
 
 // Diagonal is used to specify whether the diagonal of a matrix is unit or non-unit.
diff --git a/lapack/lapack64/dlaev2.v b/lapack/lapack64/dlaev2.v
new file mode 100644
index 000000000..cf6fcc011
--- /dev/null
+++ b/lapack/lapack64/dlaev2.v
@@ -0,0 +1,73 @@
+module lapack64
+
+import math
+
+pub fn dlaev2(a f64, b f64, c f64) (f64, f64, f64, f64) {
+	sm := a + c
+	df := a - c
+	adf := math.abs(df)
+	tb := b + b
+	ab := math.abs(tb)
+	mut acmx := c
+	mut acmn := a
+	if math.abs(a) > math.abs(c) {
+		acmx = a
+		acmn = c
+	}
+	mut rt := 0.0
+	if adf > ab {
+		rt = adf * math.sqrt(1 + (ab / adf) * (ab / adf))
+	} else if adf < ab {
+		rt = ab * math.sqrt(1 + (adf / ab) * (adf / ab))
+	} else {
+		rt = ab * math.sqrt(2)
+	}
+	mut rt1 := 0.0
+	mut rt2 := 0.0
+	mut cs1 := 0.0
+	mut sn1 := 0.0
+	mut sgn1 := 0.0
+	if sm < 0 {
+		rt1 = 0.5 * (sm - rt)
+		sgn1 = -1
+		rt2 = (acmx / rt1) * acmn - (b / rt1) * b
+	} else if sm > 0 {
+		rt1 = 0.5 * (sm + rt)
+		sgn1 = 1
+		rt2 = (acmx / rt1) * acmn - (b / rt1) * b
+	} else {
+		rt1 = 0.5 * rt
+		rt2 = -0.5 * rt
+		sgn1 = 1
+	}
+	mut cs := 0.0
+	mut sgn2 := 0.0
+	if df >= 0 {
+		cs = df + rt
+		sgn2 = 1
+	} else {
+		cs = df - rt
+		sgn2 = -1
+	}
+	acs := math.abs(cs)
+	if acs > ab {
+		ct := -tb / cs
+		sn1 = 1 / math.sqrt(1 + ct * ct)
+		cs1 = ct * sn1
+	} else {
+		if ab == 0 {
+			cs1 = 1
+			sn1 = 0
+		} else {
+			tn := -cs / tb
+			cs1 = 1 / math.sqrt(1 + tn * tn)
+			sn1 = tn * cs1
+		}
+	}
+	if sgn1 == sgn2 {
+		tn := cs1
+		cs1 = -sn1
+		sn1 = tn
+	}
+	return rt1, rt2, cs1, sn1
+}
diff --git a/lapack/lapack64/dlarf.v b/lapack/lapack64/dlarf.v
new file mode 100644
index 000000000..217279980
--- /dev/null
+++ b/lapack/lapack64/dlarf.v
@@ -0,0 +1,92 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// dlarf applies an elementary reflector H to an m×n matrix C:
+//
+//  C = H * C  if side == .left
+//  C = C * H  if side == .right
+//
+// H is represented in the form
+//
+//  H = I - tau * v * vᵀ
+//
+// where tau is a scalar and v is a vector.
+//
+// work must have length at least m if side == .left and
+// at least n if side == .right.
+//
+// dlarf is an internal routine. It is exported for testing purposes.
+pub fn dlarf(side blas.Side, m int, n int, v []f64, incv int, tau f64, mut c []f64, ldc int, mut work []f64) {
+	if side != .left && side != .right {
+		panic(bad_side)
+	}
+	if m < 0 {
+		panic(m_lt0)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if incv == 0 {
+		panic(zero_inc_v)
+	}
+	if ldc < math.max(1, n) {
+		panic(bad_ld_c)
+	}
+
+	if m == 0 || n == 0 {
+		return
+	}
+
+	applyleft := side == .left
+	len_v := if applyleft { m } else { n }
+
+	if v.len < 1 + (len_v - 1) * math.abs(incv) {
+		panic(short_v)
+	}
+	if c.len < (m - 1) * ldc + n {
+		panic(short_c)
+	}
+	if (applyleft && work.len < n) || (!applyleft && work.len < m) {
+		panic(short_work)
+	}
+
+	mut lastv := -1 // last non-zero element of v
+	mut lastc := -1 // last non-zero row/column of C
+	if tau != 0 {
+		lastv = if applyleft { m - 1 } else { n - 1 }
+		mut i := if incv > 0 { lastv * incv } else { 0 }
+		// Look for the last non-zero row in v.
+		for lastv >= 0 && v[i] == 0 {
+			lastv--
+			i -= incv
+		}
+		if applyleft {
+			// Scan for the last non-zero column in C[0:lastv, :]
+			lastc = iladlc(lastv + 1, n, c, ldc)
+		} else {
+			// Scan for the last non-zero row in C[:, 0:lastv]
+			lastc = iladlr(m, lastv + 1, c, ldc)
+		}
+	}
+	if lastv == -1 || lastc == -1 {
+		return
+	}
+
+	if applyleft {
+		// Form H * C
+		// w[0:lastc+1] = c[1:lastv+1, 1:lastc+1]ᵀ * v[1:lastv+1,1]
+		blas.dgemv(.trans, lastv + 1, lastc + 1, 1.0, c, ldc, v, incv, 0.0, mut work,
+			1)
+		// c[0: lastv, 0: lastc] = c[...] - w[0:lastv, 1] * v[1:lastc, 1]ᵀ
+		blas.dger(lastv + 1, lastc + 1, -tau, v, incv, work, 1, mut c, ldc)
+	} else {
+		// Form C * H
+		// w[0:lastc+1,1] := c[0:lastc+1,0:lastv+1] * v[0:lastv+1,1]
+		blas.dgemv(.no_trans, lastc + 1, lastv + 1, 1.0, c, ldc, v, incv, 0.0, mut work,
+			1)
+		// c[0:lastc+1,0:lastv+1] = c[...] - w[0:lastc+1,0] * v[0:lastv+1,0]ᵀ
+		blas.dger(lastc + 1, lastv + 1, -tau, work, 1, v, incv, mut c, ldc)
+	}
+}
diff --git a/lapack/lapack64/dlarfb.v b/lapack/lapack64/dlarfb.v
new file mode 100644
index 000000000..932acf752
--- /dev/null
+++ b/lapack/lapack64/dlarfb.v
@@ -0,0 +1,259 @@
+module lapack64
+
+import math
+import vsl.blas
+
+pub fn dlarfb(side blas.Side, trans blas.Transpose, direct Direct, store StoreV, m int, n int, k int, v []f64, ldv int, t []f64, ldt int, mut c []f64, ldc int, mut work []f64, ldwork int) {
+	if side != .left && side != .right {
+		panic(bad_side)
+	}
+	if trans != .trans && trans != .no_trans {
+		panic(bad_trans)
+	}
+	if direct != .forward && direct != .backward {
+		panic(bad_direct)
+	}
+	if store != .column_wise && store != .row_wise {
+		panic(bad_store_v)
+	}
+	if m < 0 {
+		panic(m_lt0)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if k < 0 {
+		panic(k_lt0)
+	}
+	if store == .column_wise && ldv < math.max(1, k) {
+		panic(bad_ld_v)
+	}
+	if store == .row_wise && ldv < math.max(1, m) {
+		panic(bad_ld_v)
+	}
+	if ldt < math.max(1, k) {
+		panic(bad_ld_t)
+	}
+	if ldc < math.max(1, n) {
+		panic(bad_ld_c)
+	}
+	if ldwork < math.max(1, k) {
+		panic(bad_ld_work)
+	}
+
+	if m == 0 || n == 0 {
+		return
+	}
+
+	mut nv := m
+	if side == .right {
+		nv = n
+	}
+	if store == .column_wise && v.len < (nv - 1) * ldv + k {
+		panic(short_v)
+	}
+	if store == .row_wise && v.len < (k - 1) * ldv + nv {
+		panic(short_v)
+	}
+	if t.len < (k - 1) * ldt + k {
+		panic(short_t)
+	}
+	if c.len < (m - 1) * ldc + n {
+		panic(short_c)
+	}
+	if work.len < (nv - 1) * ldwork + k {
+		panic(short_work)
+	}
+
+	transt := if trans == .trans { blas.Transpose.no_trans } else { blas.Transpose.trans }
+
+	if store == .column_wise {
+		if direct == .forward {
+			if side == .left {
+				for j := 0; j < k; j++ {
+					blas.dcopy(n, c[j * ldc..], 1, mut work[j * ldwork..], 1)
+				}
+				blas.dtrmm(.right, .lower, .no_trans, .unit, n, k, 1.0, v, ldv, mut work,
+					ldwork)
+				if m > k {
+					blas.dgemm(.trans, .no_trans, n, k, m - k, 1.0, c[k * ldc..], ldc,
+						v[k * ldv..], ldv, 1.0, mut work, ldwork)
+				}
+				blas.dtrmm(.right, .upper, transt, .non_unit, n, k, 1.0, t, ldt, mut work,
+					ldwork)
+				if m > k {
+					blas.dgemm(.no_trans, .trans, m - k, n, k, -1.0, v[k * ldv..], ldv,
+						work, ldwork, 1.0, mut c[k * ldc..], ldc)
+				}
+				blas.dtrmm(.right, .lower, .trans, .unit, n, k, 1.0, v, ldv, mut work,
+					ldwork)
+				for i := 0; i < n; i++ {
+					for j := 0; j < k; j++ {
+						c[j * ldc + i] -= unsafe { work[i * ldwork + j] }
+					}
+				}
+				return
+			}
+			for j := 0; j < k; j++ {
+				blas.dcopy(m, c[j..], ldc, mut work[j * ldwork..], 1)
+			}
+			blas.dtrmm(.right, .lower, .no_trans, .unit, m, k, 1.0, v, ldv, mut work,
+				ldwork)
+			if n > k {
+				blas.dgemm(.no_trans, .no_trans, m, k, n - k, 1.0, c[k..], ldc, v[k * ldv..],
+					ldv, 1.0, mut work, ldwork)
+			}
+			blas.dtrmm(.right, .upper, trans, .non_unit, m, k, 1.0, t, ldt, mut work,
+				ldwork)
+			if n > k {
+				blas.dgemm(.no_trans, .no_trans, m, n - k, k, -1.0, work, ldwork, v[k * ldv..],
+					ldv, 1.0, mut c[k..], ldc)
+			}
+			blas.dtrmm(.right, .lower, .trans, .unit, m, k, 1.0, v, ldv, mut work, ldwork)
+			for i := 0; i < m; i++ {
+				for j := 0; j < k; j++ {
+					c[i * ldc + j] -= unsafe { work[i * ldwork + j] }
+				}
+			}
+			return
+		}
+		if side == .left {
+			for j := 0; j < k; j++ {
+				blas.dcopy(n, c[(m - k + j) * ldc..], 1, mut work[j * ldwork..], 1)
+			}
+			blas.dtrmm(.right, .upper, .no_trans, .unit, n, k, 1.0, v[(m - k) * ldv..],
+				ldv, mut work, ldwork)
+			if m > k {
+				blas.dgemm(.trans, .no_trans, n, k, m - k, 1.0, c, ldc, v, ldv, 1.0, mut
+					work, ldwork)
+			}
+			blas.dtrmm(.right, .lower, transt, .non_unit, n, k, 1.0, t, ldt, mut work,
+				ldwork)
+			if m > k {
+				blas.dgemm(.no_trans, .trans, m - k, n, k, -1.0, v, ldv, work, ldwork,
+					1.0, mut c, ldc)
+			}
+			blas.dtrmm(.right, .upper, .trans, .unit, n, k, 1.0, v[(m - k) * ldv..], ldv, mut
+				work, ldwork)
+			for i := 0; i < n; i++ {
+				for j := 0; j < k; j++ {
+					c[(m - k + j) * ldc + i] -= unsafe { work[i * ldwork + j] }
+				}
+			}
+			return
+		}
+		for j := 0; j < k; j++ {
+			blas.dcopy(m, c[(n - k + j)..], ldc, mut work[j * ldwork..], 1)
+		}
+		blas.dtrmm(.right, .upper, .no_trans, .unit, m, k, 1.0, v[(n - k) * ldv..], ldv, mut
+			work, ldwork)
+		if n > k {
+			blas.dgemm(.no_trans, .no_trans, m, k, n - k, 1.0, c, ldc, v, ldv, 1.0, mut
+				work, ldwork)
+		}
+		blas.dtrmm(.right, .lower, trans, .non_unit, m, k, 1.0, t, ldt, mut work, ldwork)
+		if n > k {
+			blas.dgemm(.no_trans, .trans, m, n - k, k, -1.0, work, ldwork, v, ldv, 1.0, mut
+				c, ldc)
+		}
+		blas.dtrmm(.right, .upper, .trans, .unit, m, k, 1.0, v[(n - k) * ldv..], ldv, mut
+			work, ldwork)
+		for i := 0; i < m; i++ {
+			for j := 0; j < k; j++ {
+				c[i * ldc + (n - k + j)] -= unsafe { work[i * ldwork + j] }
+			}
+		}
+		return
+	}
+	if direct == .forward {
+		if side == .left {
+			for j := 0; j < k; j++ {
+				blas.dcopy(n, c[j * ldc..], 1, mut work[j * ldwork..], 1)
+			}
+			blas.dtrmm(.right, .upper, .trans, .unit, n, k, 1.0, v, ldv, mut work, ldwork)
+			if m > k {
+				blas.dgemm(.trans, .trans, n, k, m - k, 1.0, c[k * ldc..], ldc, v[k..],
+					ldv, 1.0, mut work, ldwork)
+			}
+			blas.dtrmm(.right, .upper, transt, .non_unit, n, k, 1.0, t, ldt, mut work,
+				ldwork)
+			if m > k {
+				blas.dgemm(.trans, .trans, m - k, n, k, -1.0, v[k..], ldv, work, ldwork,
+					1.0, mut c[k * ldc..], ldc)
+			}
+			blas.dtrmm(.right, .upper, .no_trans, .unit, n, k, 1.0, v, ldv, mut work,
+				ldwork)
+			for i := 0; i < n; i++ {
+				for j := 0; j < k; j++ {
+					c[j * ldc + i] -= unsafe { work[i * ldwork + j] }
+				}
+			}
+			return
+		}
+		for j := 0; j < k; j++ {
+			blas.dcopy(m, c[j..], ldc, mut work[j * ldwork..], 1)
+		}
+		blas.dtrmm(.right, .upper, .trans, .unit, m, k, 1.0, v, ldv, mut work, ldwork)
+		if n > k {
+			blas.dgemm(.no_trans, .trans, m, k, n - k, 1.0, c[k..], ldc, v[k..], ldv,
+				1.0, mut work, ldwork)
+		}
+		blas.dtrmm(.right, .upper, trans, .non_unit, m, k, 1.0, t, ldt, mut work, ldwork)
+		if n > k {
+			blas.dgemm(.no_trans, .trans, m, n - k, k, -1.0, work, ldwork, v[k..], ldv,
+				1.0, mut c[k..], ldc)
+		}
+		blas.dtrmm(.right, .upper, .no_trans, .unit, m, k, 1.0, v, ldv, mut work, ldwork)
+		for i := 0; i < m; i++ {
+			for j := 0; j < k; j++ {
+				c[i * ldc + j] -= unsafe { work[i * ldwork + j] }
+			}
+		}
+		return
+	}
+	if side == .left {
+		for j := 0; j < k; j++ {
+			blas.dcopy(n, c[(m - k + j) * ldc..], 1, mut work[j * ldwork..], 1)
+		}
+		blas.dtrmm(.right, .lower, .no_trans, .unit, n, k, 1.0, v[(m - k)..], ldv, mut
+			work, ldwork)
+		if m > k {
+			blas.dgemm(.trans, .no_trans, n, k, m - k, 1.0, c, ldc, v, ldv, 1.0, mut work,
+				ldwork)
+		}
+		blas.dtrmm(.right, .lower, transt, .non_unit, n, k, 1.0, t, ldt, mut work, ldwork)
+		if m > k {
+			blas.dgemm(.no_trans, .trans, m - k, n, k, -1.0, v, ldv, work, ldwork, 1.0, mut
+				c, ldc)
+		}
+		blas.dtrmm(.right, .lower, .trans, .unit, n, k, 1.0, v[(m - k)..], ldv, mut work,
+			ldwork)
+		for i := 0; i < n; i++ {
+			for j := 0; j < k; j++ {
+				c[(m - k + j) * ldc + i] -= unsafe { work[i * ldwork + j] }
+			}
+		}
+		return
+	}
+	for j := 0; j < k; j++ {
+		blas.dcopy(m, c[(n - k + j)..], ldc, mut work[j * ldwork..], 1)
+	}
+	blas.dtrmm(.right, .lower, .no_trans, .unit, m, k, 1.0, v[(n - k)..], ldv, mut work,
+		ldwork)
+	if n > k {
+		blas.dgemm(.no_trans, .trans, m, k, n - k, 1.0, c, ldc, v, ldv, 1.0, mut work,
+			ldwork)
+	}
+	blas.dtrmm(.right, .lower, trans, .non_unit, m, k, 1.0, t, ldt, mut work, ldwork)
+	if n > k {
+		blas.dgemm(.no_trans, .no_trans, m, n - k, k, -1.0, work, ldwork, v, ldv, 1.0, mut
+			c, ldc)
+	}
+	blas.dtrmm(.right, .lower, .trans, .unit, m, k, 1.0, v[(n - k)..], ldv, mut work,
+		ldwork)
+	for i := 0; i < m; i++ {
+		for j := 0; j < k; j++ {
+			c[i * ldc + (n - k + j)] -= unsafe { work[i * ldwork + j] }
+		}
+	}
+}
diff --git a/lapack/lapack64/dlarfg.v b/lapack/lapack64/dlarfg.v
new file mode 100644
index 000000000..d870c3691
--- /dev/null
+++ b/lapack/lapack64/dlarfg.v
@@ -0,0 +1,51 @@
+module lapack64
+
+import math
+import vsl.blas
+
+pub fn dlarfg(n int, alpha f64, mut x []f64, incx int) (f64, f64) {
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if incx <= 0 {
+		panic(bad_inc_x)
+	}
+
+	if n <= 1 {
+		return alpha, 0
+	}
+
+	if x.len < 1 + (n - 2) * math.abs(incx) {
+		panic(short_x)
+	}
+
+	mut xnorm := blas.dnrm2(n - 1, x, incx)
+	if xnorm == 0 {
+		return alpha, 0
+	}
+	mut beta := -math.copysign(dlapy2(alpha, xnorm), alpha)
+	safmin := dlamch_s / dlamch_e
+	mut knt := 0
+	mut alpha_ := alpha
+	if math.abs(beta) < safmin {
+		// xnorm and beta may be inaccurate, scale x and recompute.
+		rsafmn := 1 / safmin
+		for {
+			knt++
+			blas.dscal(n - 1, rsafmn, mut x, incx)
+			beta *= rsafmn
+			alpha_ *= rsafmn
+			if math.abs(beta) >= safmin {
+				break
+			}
+		}
+		xnorm = blas.dnrm2(n - 1, x, incx)
+		beta = -math.copysign(dlapy2(alpha_, xnorm), alpha_)
+	}
+	mut tau := (beta - alpha_) / beta
+	blas.dscal(n - 1, 1 / (alpha_ - beta), mut x, incx)
+	for _ in 0 .. knt {
+		beta *= safmin
+	}
+	return beta, tau
+}
diff --git a/lapack/lapack64/dlarft.v b/lapack/lapack64/dlarft.v
new file mode 100644
index 000000000..e7ef8825a
--- /dev/null
+++ b/lapack/lapack64/dlarft.v
@@ -0,0 +1,155 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// dlarft forms the triangular factor T of a block reflector H, storing the answer
+// in t.
+//
+//  H = I - V * T * Vᵀ  if store == .column_wise
+//  H = I - Vᵀ * T * V  if store == .row_wise
+//
+// H is defined by a product of the elementary reflectors where
+//
+//  H = H_0 * H_1 * ... * H_{k-1}  if direct == .forward
+//  H = H_{k-1} * ... * H_1 * H_0  if direct == .backward
+//
+// t is a k×k triangular matrix. t is upper triangular if direct = .forward
+// and lower triangular otherwise. This function will panic if t is not of
+// sufficient size.
+//
+// store describes the storage of the elementary reflectors in v. See
+// dlarfb for a description of layout.
+//
+// tau contains the scalar factors of the elementary reflectors H_i.
+//
+// dlarft is an internal routine. It is exported for testing purposes.
+pub fn dlarft(direct Direct, store StoreV, n int, k int, v []f64, ldv int, tau []f64, mut t []f64, ldt int) {
+	mv, nv := if store == .row_wise { k, n } else { n, k }
+	if direct != .forward && direct != .backward {
+		panic(bad_direct)
+	}
+	if store != .row_wise && store != .column_wise {
+		panic(bad_store_v)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if k < 1 {
+		panic(k_lt1)
+	}
+	if ldv < math.max(1, nv) {
+		panic(bad_ld_v)
+	}
+	if tau.len < k {
+		panic(short_tau)
+	}
+	if ldt < math.max(1, k) {
+		panic(short_t)
+	}
+
+	if n == 0 {
+		return
+	}
+
+	if v.len < (mv - 1) * ldv + nv {
+		panic(short_v)
+	}
+	if t.len < (k - 1) * ldt + k {
+		panic(short_t)
+	}
+
+	if direct == .forward {
+		mut prevlastv := n - 1
+		for i := 0; i < k; i++ {
+			prevlastv = math.max(i, prevlastv)
+			if tau[i] == 0 {
+				for j := 0; j <= i; j++ {
+					t[j * ldt + i] = 0
+				}
+				continue
+			}
+			mut lastv := 0
+			if store == .column_wise {
+				// skip trailing zeros
+				for lastv = n - 1; lastv >= i + 1; lastv-- {
+					if v[lastv * ldv + i] != 0 {
+						break
+					}
+				}
+				for j := 0; j < i; j++ {
+					t[j * ldt + i] = -tau[i] * v[i * ldv + j]
+				}
+				j := math.min(lastv, prevlastv)
+				blas.dgemv(.trans, j - i, i, -tau[i], v[(i + 1) * ldv..], ldv, v[(i + 1) * ldv + i..],
+					ldv, 1.0, mut t[i..], ldt)
+			} else {
+				for lastv = n - 1; lastv >= i + 1; lastv-- {
+					if v[i * ldv + lastv] != 0 {
+						break
+					}
+				}
+				for j := 0; j < i; j++ {
+					t[j * ldt + i] = -tau[i] * v[j * ldv + i]
+				}
+				j := math.min(lastv, prevlastv)
+				blas.dgemv(.no_trans, i, j - i, -tau[i], v[i + 1..], ldv, v[i * ldv + i + 1..],
+					1, 1.0, mut t[i..], ldt)
+			}
+			blas.dtrmv(.upper, .no_trans, .non_unit, i, t, ldt, mut t[i..], ldt)
+			t[i * ldt + i] = tau[i]
+			if i > 1 {
+				prevlastv = math.max(prevlastv, lastv)
+			} else {
+				prevlastv = lastv
+			}
+		}
+		return
+	}
+
+	mut prevlastv := 0
+	for i := k - 1; i >= 0; i-- {
+		if tau[i] == 0 {
+			for j := i; j < k; j++ {
+				t[j * ldt + i] = 0
+			}
+			continue
+		}
+		mut lastv := 0
+		if i < k - 1 {
+			if store == .column_wise {
+				for lastv = 0; lastv < i; lastv++ {
+					if v[lastv * ldv + i] != 0 {
+						break
+					}
+				}
+				for j := i + 1; j < k; j++ {
+					t[j * ldt + i] = -tau[i] * v[(n - k + i) * ldv + j]
+				}
+				j := math.max(lastv, prevlastv)
+				blas.dgemv(.trans, n - k + i - j, k - i - 1, -tau[i], v[j * ldv + i + 1..],
+					ldv, v[j * ldv + i..], ldv, 1.0, mut t[(i + 1) * ldt + i..], ldt)
+			} else {
+				for lastv = 0; lastv < i; lastv++ {
+					if v[i * ldv + lastv] != 0 {
+						break
+					}
+				}
+				for j := i + 1; j < k; j++ {
+					t[j * ldt + i] = -tau[i] * v[j * ldv + n - k + i]
+				}
+				j := math.max(lastv, prevlastv)
+				blas.dgemv(.no_trans, k - i - 1, n - k + i - j, -tau[i], v[(i + 1) * ldv + j..],
+					ldv, v[i * ldv + j..], 1, 1.0, mut t[(i + 1) * ldt + i..], ldt)
+			}
+			blas.dtrmv(.lower, .no_trans, .non_unit, k - i - 1, t[(i + 1) * ldt + i + 1..],
+				ldt, mut t[(i + 1) * ldt + i..], ldt)
+			if i > 0 {
+				prevlastv = math.min(prevlastv, lastv)
+			} else {
+				prevlastv = lastv
+			}
+		}
+		t[i * ldt + i] = tau[i]
+	}
+}
diff --git a/lapack/lapack64/dlartg.v b/lapack/lapack64/dlartg.v
new file mode 100644
index 000000000..b4282092e
--- /dev/null
+++ b/lapack/lapack64/dlartg.v
@@ -0,0 +1,56 @@
+module lapack64
+
+import math
+
+// dlartg generates a plane rotation so that
+//
+//	[ cs sn] * [f] = [r]
+//	[-sn cs]   [g] = [0]
+//
+// where cs*cs + sn*sn = 1.
+//
+// This is a more accurate version of BLAS Drotg that uses scaling to avoid
+// overflow or underflow, with the other differences that
+//   - cs >= 0
+//   - if g = 0, then cs = 1 and sn = 0
+//   - if f = 0 and g != 0, then cs = 0 and sn = sign(1,g)
+//
+// dlartg is an internal routine. It is exported for testing purposes.
+pub fn dlartg(f f64, g f64) (f64, f64, f64) {
+	if g == 0 {
+		return 1, 0, f
+	}
+
+	g1 := math.abs(g)
+
+	if f == 0 {
+		return 0, math.copysign(1, g), g1
+	}
+
+	safmin := dlamch_s
+	safmax := 1 / safmin
+	rtmin := math.sqrt(safmin)
+	rtmax := math.sqrt(safmax / 2)
+
+	f1 := math.abs(f)
+
+	if rtmin < f1 && f1 < rtmax && rtmin < g1 && g1 < rtmax {
+		d := math.sqrt(f * f + g * g)
+		cs := f1 / d
+		r := math.copysign(d, f)
+		sn := g / r
+
+		return cs, sn, r
+	}
+
+	u := math.min(math.max(safmin, math.max(f1, g1)), safmax)
+	fs := f / u
+	gs := g / u
+	d := math.sqrt(fs * fs + gs * gs)
+	cs := math.abs(fs) / d
+	mut r := math.copysign(d, f)
+	sn := gs / r
+	r *= u
+
+	return cs, sn, r
+}
diff --git a/lapack/lapack64/dlaset.v b/lapack/lapack64/dlaset.v
new file mode 100644
index 000000000..0ad06dbc7
--- /dev/null
+++ b/lapack/lapack64/dlaset.v
@@ -0,0 +1,59 @@
+module lapack64
+
+import vsl.blas
+import math
+
+// dlaset sets the off-diagonal elements of A to alpha, and the diagonal
+// elements to beta. If uplo == blas.upper, only the elements in the upper
+// triangular part are set. If uplo == blas.lower, only the elements in the
+// lower triangular part are set. If uplo is otherwise, all of the elements of A
+// are set.
+//
+// dlaset is an internal routine. It is exported for testing purposes.
+pub fn dlaset(uplo blas.Uplo, m int, n int, alpha f64, beta f64, mut a []f64, lda int) {
+	if m < 0 {
+		panic(m_lt0)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	}
+
+	minmn := math.min(m, n)
+	if minmn == 0 {
+		return
+	}
+
+	if a.len < (m - 1) * lda + n {
+		panic(short_a)
+	}
+
+	match uplo {
+		.upper {
+			for i in 0 .. m {
+				for j in i + 1 .. n {
+					a[i * lda + j] = alpha
+				}
+			}
+		}
+		.lower {
+			for i in 0 .. m {
+				for j in 0 .. math.min(i, n) {
+					a[i * lda + j] = alpha
+				}
+			}
+		}
+		else {
+			for i in 0 .. m {
+				for j in 0 .. n {
+					a[i * lda + j] = alpha
+				}
+			}
+		}
+	}
+	for i in 0 .. minmn {
+		a[i * lda + i] = beta
+	}
+}
diff --git a/lapack/lapack64/dlasr.v b/lapack/lapack64/dlasr.v
new file mode 100644
index 000000000..8c9c6a900
--- /dev/null
+++ b/lapack/lapack64/dlasr.v
@@ -0,0 +1,277 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// Dlasr applies a sequence of plane rotations to the m×n matrix A. This series
+// of plane rotations is implicitly represented by a matrix P. P is multiplied
+// by a depending on the value of side -- A = P * A if side == Side.left,
+// A = A * Pᵀ if side == Side.right.
+//
+// The exact value of P depends on the value of pivot, but in all cases P is
+// implicitly represented by a series of 2×2 rotation matrices. The entries of
+// rotation matrix k are defined by s[k] and c[k]
+//
+//	R(k) = [ c[k] s[k]]
+//	       [-s[k] s[k]]
+//
+// If direct == Direct.forward, the rotation matrices are applied as
+// P = P(z-1) * ... * P(2) * P(1), while if direct == Direct.backward they are
+// applied as P = P(1) * P(2) * ... * P(n).
+//
+// pivot defines the mapping of the elements in R(k) to P(k).
+// If pivot == Pivot.variable, the rotation is performed for the (k, k+1) plane.
+//
+//	P(k) = [1                    ]
+//	       [    ...              ]
+//	       [     1               ]
+//	       [       c[k] s[k]     ]
+//	       [      -s[k] c[k]     ]
+//	       [                 1   ]
+//	       [                ...  ]
+//	       [                    1]
+//
+// if pivot == Pivot.top, the rotation is performed for the (1, k+1) plane,
+//
+//	P(k) = [c[k]        s[k]     ]
+//	       [    1                ]
+//	       [     ...             ]
+//	       [         1           ]
+//	       [-s[k]       c[k]     ]
+//	       [                 1   ]
+//	       [                ...  ]
+//	       [                    1]
+//
+// and if pivot == Pivot.bottom, the rotation is performed for the (k, z) plane.
+//
+//	P(k) = [1                    ]
+//	       [  ...                ]
+//	       [      1              ]
+//	       [        c[k]     s[k]]
+//	       [           1         ]
+//	       [            ...      ]
+//	       [              1      ]
+//	       [       -s[k]     c[k]]
+//
+// s and c have length m - 1 if side == Side.left, and n - 1 if side == Side.right.
+//
+pub fn dlasr(side blas.Side, pivot Pivot, direct Direct, m int, n int, c []f64, s []f64, mut a []f64, lda int) {
+	if side != .left && side != .right {
+		panic(bad_side)
+	}
+	if pivot != .variable && pivot != .top && pivot != .bottom {
+		panic(bad_pivot)
+	}
+	if direct != .forward && direct != .backward {
+		panic(bad_direct)
+	}
+	if m < 0 {
+		panic(m_lt0)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	}
+
+	// Quick return if possible.
+	if m == 0 || n == 0 {
+		return
+	}
+
+	if side == .left {
+		if c.len < m - 1 {
+			panic(short_c)
+		}
+		if s.len < m - 1 {
+			panic(short_s)
+		}
+	} else {
+		if c.len < n - 1 {
+			panic(short_c)
+		}
+		if s.len < n - 1 {
+			panic(short_s)
+		}
+	}
+	if a.len < (m - 1) * lda + n {
+		panic(short_a)
+	}
+
+	if side == .left {
+		if pivot == .variable {
+			if direct == .forward {
+				for j := 0; j < m - 1; j++ {
+					ctmp := c[j]
+					stmp := s[j]
+					if ctmp != 1 || stmp != 0 {
+						for i := 0; i < n; i++ {
+							tmp2 := a[j * lda + i]
+							tmp := a[(j + 1) * lda + i]
+							a[(j + 1) * lda + i] = ctmp * tmp - stmp * tmp2
+							a[j * lda + i] = stmp * tmp + ctmp * tmp2
+						}
+					}
+				}
+				return
+			}
+			for j := m - 2; j >= 0; j-- {
+				ctmp := c[j]
+				stmp := s[j]
+				if ctmp != 1 || stmp != 0 {
+					for i := 0; i < n; i++ {
+						tmp2 := a[j * lda + i]
+						tmp := a[(j + 1) * lda + i]
+						a[(j + 1) * lda + i] = ctmp * tmp - stmp * tmp2
+						a[j * lda + i] = stmp * tmp + ctmp * tmp2
+					}
+				}
+			}
+			return
+		} else if pivot == .top {
+			if direct == .forward {
+				for j := 1; j < m; j++ {
+					ctmp := c[j - 1]
+					stmp := s[j - 1]
+					if ctmp != 1 || stmp != 0 {
+						for i := 0; i < n; i++ {
+							tmp := a[j * lda + i]
+							tmp2 := a[i]
+							a[j * lda + i] = ctmp * tmp - stmp * tmp2
+							a[i] = stmp * tmp + ctmp * tmp2
+						}
+					}
+				}
+				return
+			}
+			for j := m - 1; j >= 1; j-- {
+				ctmp := c[j - 1]
+				stmp := s[j - 1]
+				if ctmp != 1 || stmp != 0 {
+					for i := 0; i < n; i++ {
+						tmp := a[j * lda + i]
+						tmp2 := a[i]
+						a[j * lda + i] = ctmp * tmp - stmp * tmp2
+						a[i] = stmp * tmp + ctmp * tmp2
+					}
+				}
+			}
+			return
+		}
+		if direct == .forward {
+			for j := 0; j < m - 1; j++ {
+				ctmp := c[j]
+				stmp := s[j]
+				if ctmp != 1 || stmp != 0 {
+					for i := 0; i < n; i++ {
+						tmp := a[j * lda + i]
+						tmp2 := a[(m - 1) * lda + i]
+						a[j * lda + i] = stmp * tmp2 + ctmp * tmp
+						a[(m - 1) * lda + i] = ctmp * tmp2 - stmp * tmp
+					}
+				}
+			}
+			return
+		}
+		for j := m - 2; j >= 0; j-- {
+			ctmp := c[j]
+			stmp := s[j]
+			if ctmp != 1 || stmp != 0 {
+				for i := 0; i < n; i++ {
+					tmp := a[j * lda + i]
+					tmp2 := a[(m - 1) * lda + i]
+					a[j * lda + i] = stmp * tmp2 + ctmp * tmp
+					a[(m - 1) * lda + i] = ctmp * tmp2 - stmp * tmp
+				}
+			}
+		}
+		return
+	}
+	if pivot == .variable {
+		if direct == .forward {
+			for j := 0; j < n - 1; j++ {
+				ctmp := c[j]
+				stmp := s[j]
+				if ctmp != 1 || stmp != 0 {
+					for i := 0; i < m; i++ {
+						tmp := a[i * lda + j + 1]
+						tmp2 := a[i * lda + j]
+						a[i * lda + j + 1] = ctmp * tmp - stmp * tmp2
+						a[i * lda + j] = stmp * tmp + ctmp * tmp2
+					}
+				}
+			}
+			return
+		}
+		for j := n - 2; j >= 0; j-- {
+			ctmp := c[j]
+			stmp := s[j]
+			if ctmp != 1 || stmp != 0 {
+				for i := 0; i < m; i++ {
+					tmp := a[i * lda + j + 1]
+					tmp2 := a[i * lda + j]
+					a[i * lda + j + 1] = ctmp * tmp - stmp * tmp2
+					a[i * lda + j] = stmp * tmp + ctmp * tmp2
+				}
+			}
+		}
+		return
+	} else if pivot == .top {
+		if direct == .forward {
+			for j := 1; j < n; j++ {
+				ctmp := c[j - 1]
+				stmp := s[j - 1]
+				if ctmp != 1 || stmp != 0 {
+					for i := 0; i < m; i++ {
+						tmp := a[i * lda + j]
+						tmp2 := a[i * lda]
+						a[i * lda + j] = ctmp * tmp - stmp * tmp2
+						a[i * lda] = stmp * tmp + ctmp * tmp2
+					}
+				}
+			}
+			return
+		}
+		for j := n - 1; j >= 1; j-- {
+			ctmp := c[j - 1]
+			stmp := s[j - 1]
+			if ctmp != 1 || stmp != 0 {
+				for i := 0; i < m; i++ {
+					tmp := a[i * lda + j]
+					tmp2 := a[i * lda]
+					a[i * lda + j] = ctmp * tmp - stmp * tmp2
+					a[i * lda] = stmp * tmp + ctmp * tmp2
+				}
+			}
+		}
+		return
+	}
+	if direct == .forward {
+		for j := 0; j < n - 1; j++ {
+			ctmp := c[j]
+			stmp := s[j]
+			if ctmp != 1 || stmp != 0 {
+				for i := 0; i < m; i++ {
+					tmp := a[i * lda + j]
+					tmp2 := a[i * lda + n - 1]
+					a[i * lda + j] = stmp * tmp2 + ctmp * tmp
+					a[i * lda + n - 1] = ctmp * tmp2 - stmp * tmp
+				}
+			}
+		}
+		return
+	}
+	for j := n - 2; j >= 0; j-- {
+		ctmp := c[j]
+		stmp := s[j]
+		if ctmp != 1 || stmp != 0 {
+			for i := 0; i < m; i++ {
+				tmp := a[i * lda + j]
+				tmp2 := a[i * lda + n - 1]
+				a[i * lda + j] = stmp * tmp2 + ctmp * tmp
+				a[i * lda + n - 1] = ctmp * tmp2 - stmp * tmp
+			}
+		}
+	}
+}
diff --git a/lapack/lapack64/dlatrd.v b/lapack/lapack64/dlatrd.v
new file mode 100644
index 000000000..3469624d5
--- /dev/null
+++ b/lapack/lapack64/dlatrd.v
@@ -0,0 +1,108 @@
+module lapack64
+
+import math
+import vsl.blas
+
+pub fn dlatrd(uplo blas.Uplo, n int, nb int, mut a []f64, lda int, mut e []f64, mut tau []f64, mut w []f64, ldw int) {
+	if uplo != .upper && uplo != .lower {
+		panic(bad_uplo)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if nb < 0 {
+		panic(nb_lt0)
+	}
+	if nb > n {
+		panic(nb_gtn)
+	}
+	if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	}
+	if ldw < math.max(1, nb) {
+		panic(bad_ld_w)
+	}
+
+	if n == 0 {
+		return
+	}
+
+	if a.len < (n - 1) * lda + n {
+		panic(short_a)
+	}
+	if w.len < (n - 1) * ldw + nb {
+		panic(short_w)
+	}
+	if e.len < n - 1 {
+		panic(short_e)
+	}
+	if tau.len < n - 1 {
+		panic(short_tau)
+	}
+
+	if uplo == .upper {
+		for i := n - 1; i >= n - nb; i-- {
+			iw := i - n + nb
+			if i < n - 1 {
+				// Update A(0:i, i).
+				blas.dgemv(.no_trans, i + 1, n - i - 1, -1, a[i + 1..], lda, w[i * ldw + iw + 1..],
+					1, 1, mut a[i..], lda)
+				blas.dgemv(.no_trans, i + 1, n - i - 1, -1, w[iw + 1..], ldw, a[i * lda + i + 1..],
+					1, 1, mut a[i..], lda)
+			}
+			if i > 0 {
+				// Generate elementary reflector H_i to annihilate A(0:i-2,i).
+				e[i - 1], tau[i - 1] = dlarfg(i, a[(i - 1) * lda + i], mut a[i..], lda)
+				a[(i - 1) * lda + i] = 1
+
+				// Compute W(0:i-1, i).
+				blas.dsymv(.upper, i, 1, a, lda, a[i..], lda, 0, mut w[iw..], ldw)
+				if i < n - 1 {
+					blas.dgemv(.trans, i, n - i - 1, 1, w[iw + 1..], ldw, a[i..], lda,
+						0, mut w[(i + 1) * ldw + iw..], ldw)
+					blas.dgemv(.no_trans, i, n - i - 1, -1, a[i + 1..], lda, w[(i + 1) * ldw + iw..],
+						ldw, 1, mut w[iw..], ldw)
+					blas.dgemv(.trans, i, n - i - 1, 1, a[i + 1..], lda, a[i..], lda,
+						0, mut w[(i + 1) * ldw + iw..], ldw)
+					blas.dgemv(.no_trans, i, n - i - 1, -1, w[iw + 1..], ldw, w[(i + 1) * ldw + iw..],
+						ldw, 1, mut w[iw..], ldw)
+				}
+				blas.dscal(i, tau[i - 1], mut w[iw..], ldw)
+				alpha := -0.5 * tau[i - 1] * blas.ddot(i, w[iw..], ldw, a[i..], lda)
+				blas.daxpy(i, alpha, a[i..], lda, mut w[iw..], ldw)
+			}
+		}
+	} else {
+		// Reduce first nb columns of lower triangle.
+		for i := 0; i < nb; i++ {
+			// Update A(i:n, i)
+			blas.dgemv(.no_trans, n - i, i, -1, a[i * lda..], lda, w[i * ldw..], 1, 1, mut
+				a[i * lda + i..], lda)
+			blas.dgemv(.no_trans, n - i, i, -1, w[i * ldw..], ldw, a[i * lda..], 1, 1, mut
+				a[i * lda + i..], lda)
+			if i < n - 1 {
+				// Generate elementary reflector H_i to annihilate A(i+2:n,i).
+				e[i], tau[i] = dlarfg(n - i - 1, a[(i + 1) * lda + i], mut a[math.min(i +
+					2, n - 1) * lda + i..], lda)
+				a[(i + 1) * lda + i] = 1
+
+				// Compute W(i+1:n,i).
+				blas.dsymv(.lower, n - i - 1, 1, a[(i + 1) * lda + i + 1..], lda, a[(i + 1) * lda +
+					i..], lda, 0, mut w[(i + 1) * ldw + i..], ldw)
+				blas.dgemv(.trans, n - i - 1, i, 1, w[(i + 1) * ldw..], ldw, a[(i + 1) * lda + i..],
+					lda, 0, mut w[i..], ldw)
+				blas.dgemv(.no_trans, n - i - 1, i, -1, a[(i + 1) * lda..], lda, w[i..],
+					ldw, 1, mut w[(i + 1) * ldw + i..], ldw)
+				blas.dgemv(.trans, n - i - 1, i, 1, a[(i + 1) * lda..], lda, a[(i + 1) * lda + i..],
+					lda, 0, mut w[i..], ldw)
+				blas.dgemv(.no_trans, n - i - 1, i, -1, w[(i + 1) * ldw..], ldw, w[i..],
+					ldw, 1, mut w[(i + 1) * ldw + i..], ldw)
+				blas.dscal(n - i - 1, tau[i], mut w[(i + 1) * ldw + i..], ldw)
+				alpha := -0.5 * tau[i] * blas.ddot(n - i - 1, w[(i + 1) * ldw + i..],
+					ldw, a[(i + 1) * lda + i..], lda)
+				blas.daxpy(n - i - 1, alpha, a[(i + 1) * lda + i..], lda, mut w[(i + 1) * ldw + i..],
+					ldw)
+			}
+		}
+	}
+}
diff --git a/lapack/lapack64/dorg2l.v b/lapack/lapack64/dorg2l.v
index 2c2070c79..c11963980 100644
--- a/lapack/lapack64/dorg2l.v
+++ b/lapack/lapack64/dorg2l.v
@@ -64,7 +64,7 @@ pub fn dorg2l(m int, n int, k int, mut a []f64, lda int, tau []f64, mut work []f
 
 		// Apply H_i to A[0:m-k+i, 0:n-k+i] from the left.
 		a[(m - n + ii) * lda + ii] = 1
-		dlarf(.left, m - n + ii + 1, ii, mut a[ii..], lda, tau[i], mut a, lda, mut work)
+		dlarf(.left, m - n + ii + 1, ii, a[ii..], lda, tau[i], mut a, lda, mut work)
 		blas.dscal(m - n + ii, -tau[i], mut a[ii..], lda)
 		a[(m - n + ii) * lda + ii] = 1 - tau[i]
 
diff --git a/lapack/lapack64/dorg2r.v b/lapack/lapack64/dorg2r.v
new file mode 100644
index 000000000..6947b7fb4
--- /dev/null
+++ b/lapack/lapack64/dorg2r.v
@@ -0,0 +1,66 @@
+module lapack64
+
+import math
+import vsl.blas
+
+pub fn dorg2r(m int, n int, k int, mut a []f64, lda int, tau []f64, mut work []f64) {
+	if m < 0 {
+		panic(m_lt0)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if n > m {
+		panic(n_gtm)
+	}
+	if k < 0 {
+		panic(k_lt0)
+	}
+	if k > n {
+		panic(k_gtn)
+	}
+	if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	}
+
+	if n == 0 {
+		return
+	}
+
+	if a.len < (m - 1) * lda + n {
+		panic(short_a)
+	}
+	if tau.len != k {
+		panic(bad_len_tau)
+	}
+	if work.len < n {
+		panic(short_work)
+	}
+
+	// Initialize columns k+1:n to columns of the unit matrix.
+	for l := 0; l < m; l++ {
+		for j := k; j < n; j++ {
+			a[l * lda + j] = 0.0
+		}
+	}
+	for j := k; j < n; j++ {
+		a[j * lda + j] = 1.0
+	}
+	for i := k - 1; i >= 0; i-- {
+		for mut elem in work {
+			elem = 0.0
+		}
+		if i < n - 1 {
+			a[i * lda + i] = 1.0
+			dlarf(.left, m - i, n - i - 1, a[(i * lda + i)..], lda, tau[i], mut a[(i * lda + i + 1)..],
+				lda, mut work)
+		}
+		if i < m - 1 {
+			blas.dscal(m - i - 1, -tau[i], mut a[(i + 1) * lda + i..], lda)
+		}
+		a[i * lda + i] = 1.0 - tau[i]
+		for l := 0; l < i; l++ {
+			a[l * lda + i] = 0.0
+		}
+	}
+}
diff --git a/lapack/lapack64/dorgql.v b/lapack/lapack64/dorgql.v
index b16636e57..92b28cf5a 100644
--- a/lapack/lapack64/dorgql.v
+++ b/lapack/lapack64/dorgql.v
@@ -116,8 +116,8 @@ pub fn dorgql(m int, n int, k int, mut a []f64, lda int, tau []f64, mut work []f
 			if n - k + i > 0 {
 				// Form the triangular factor of the block reflector
 				// H = H_{i+ib-1} * ... * H_{i+1} * H_i.
-				dlarft(.backward, .column_wise, m - k + i + ib, ib, mut a[n - k + i..],
-					lda, tau[i..], mut work, ldwork)
+				dlarft(.backward, .column_wise, m - k + i + ib, ib, a[n - k + i..], lda,
+					tau[i..], mut work, ldwork)
 
 				// Apply H to A[0:m-k+i+ib, 0:n-k+i] from the left.
 				dlarfb(.left, .no_trans, .backward, .column_wise, m - k + i + ib, n - k + i,
diff --git a/lapack/lapack64/dorgqr.v b/lapack/lapack64/dorgqr.v
new file mode 100644
index 000000000..d2d14eb51
--- /dev/null
+++ b/lapack/lapack64/dorgqr.v
@@ -0,0 +1,104 @@
+module lapack64
+
+import math
+import vsl.blas
+
+pub fn dorgqr(m int, n int, k int, mut a []f64, lda int, tau []f64, mut work []f64, lwork int) {
+	if m < 0 {
+		panic(m_lt0)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if n > m {
+		panic(n_gtm)
+	}
+	if k < 0 {
+		panic(k_lt0)
+	}
+	if k > n {
+		panic(k_gtn)
+	}
+	if lda < math.max(1, n) && lwork != -1 {
+		panic(bad_ld_a)
+	}
+	if lwork < math.max(1, n) && lwork != -1 {
+		panic(bad_l_work)
+	}
+	if work.len < math.max(1, lwork) {
+		panic(short_work)
+	}
+
+	if n == 0 {
+		work[0] = 1.0
+		return
+	}
+
+	mut nb := ilaenv(1, 'DORGQR', ' ', m, n, k, -1)
+	if lwork == -1 {
+		work[0] = f64(n * nb)
+		return
+	}
+
+	if a.len < (m - 1) * lda + n {
+		panic(short_a)
+	}
+	if tau.len != k {
+		panic(bad_len_tau)
+	}
+
+	mut nbmin := 2
+	mut nx := 0
+	mut iws := n
+	mut ldwork := 0
+	if 1 < nb && nb < k {
+		nx = math.max(0, ilaenv(3, 'DORGQR', ' ', m, n, k, -1))
+		if nx < k {
+			ldwork = nb
+			iws = n * ldwork
+			if lwork < iws {
+				nb = lwork / n
+				ldwork = nb
+				nbmin = math.max(2, ilaenv(2, 'DORGQR', ' ', m, n, k, -1))
+			}
+		}
+	}
+	mut ki := 0
+	mut kk := 0
+	if nbmin <= nb && nb < k && nx < k {
+		ki = ((k - nx - 1) / nb) * nb
+		kk = math.min(k, ki + nb)
+		for i := 0; i < kk; i++ {
+			for j := kk; j < n; j++ {
+				unsafe {
+					a[i * lda + j] = 0.0
+				}
+			}
+		}
+	}
+	if kk < n {
+		dorg2r(m - kk, n - kk, k - kk, mut a[(kk * lda + kk)..], lda, tau[kk..], mut work)
+	}
+	if kk > 0 {
+		for i := ki; i >= 0; i -= nb {
+			ib := math.min(nb, k - i)
+			if i + ib < n {
+				dlarft(.forward, .column_wise, m - i, ib, a[(i * lda + i)..], lda, tau[i..], mut
+					work, ldwork)
+
+				dlarfb(.left, .no_trans, .forward, .column_wise, m - i, n - i - ib, ib,
+					a[(i * lda + i)..], lda, work, ldwork, mut a[(i * lda + i + ib)..],
+					lda, mut work[(ib * ldwork)..], ldwork)
+			}
+			dorg2r(m - i, ib, ib, mut a[(i * lda + i)..], lda, tau[i..(i + ib)], mut work)
+			for j := i; j < i + ib; j++ {
+				for l := 0; l < i; l++ {
+					unsafe {
+						a[l * lda + j] = 0.0
+					}
+				}
+			}
+		}
+	}
+	work[0] = f64(iws)
+}
diff --git a/lapack/lapack64/dsteqr.v b/lapack/lapack64/dsteqr.v
new file mode 100644
index 000000000..ce61a1f63
--- /dev/null
+++ b/lapack/lapack64/dsteqr.v
@@ -0,0 +1,375 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// dsteqr computes the eigenvalues and optionally the eigenvectors of a symmetric
+// tridiagonal matrix using the implicit QL or QR method. The eigenvectors of a
+// full or band symmetric matrix can also be found if dsytrd, dsptrd, or dsbtrd
+// have been used to reduce this matrix to tridiagonal form.
+//
+// d, on entry, contains the diagonal elements of the tridiagonal matrix. On exit,
+// d contains the eigenvalues in ascending order. d must have length n and
+// dsteqr will panic otherwise.
+//
+// e, on entry, contains the off-diagonal elements of the tridiagonal matrix on
+// entry, and is overwritten during the call to dsteqr. e must have length n-1 and
+// dsteqr will panic otherwise.
+//
+// z, on entry, contains the n×n orthogonal matrix used in the reduction to
+// tridiagonal form if compz == lapack.EVOrig. On exit, if
+// compz == lapack.EVOrig, z contains the orthonormal eigenvectors of the
+// original symmetric matrix, and if compz == lapack.EVTridiag, z contains the
+// orthonormal eigenvectors of the symmetric tridiagonal matrix. z is not used
+// if compz == lapack.EVCompNone.
+//
+// work must have length at least max(1, 2*n-2) if the eigenvectors are computed,
+// and dsteqr will panic otherwise.
+//
+// dsteqr is an internal routine. It is exported for testing purposes.
+pub fn dsteqr(compz EVComp, n int, mut d []f64, mut e []f64, mut z []f64, ldz int, mut work []f64) bool {
+	if compz != .ev_comp_none && compz != .ev_tridiag && compz != .ev_orig {
+		panic('bad_ev_comp')
+	}
+	if n < 0 {
+		panic('n < 0')
+	}
+	if ldz < 1 || (compz != .ev_comp_none && ldz < n) {
+		panic('bad_ldz')
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return true
+	}
+
+	if d.len < n {
+		panic('short d')
+	}
+	if e.len < n - 1 {
+		panic('short e')
+	}
+	if compz != .ev_comp_none && z.len < (n - 1) * ldz + n {
+		panic('short z')
+	}
+	if compz != .ev_comp_none && work.len < math.max(1, 2 * n - 2) {
+		panic('short work')
+	}
+
+	mut icompz := 0
+	if compz == .ev_orig {
+		icompz = 1
+	} else if compz == .ev_tridiag {
+		icompz = 2
+	}
+
+	if n == 1 {
+		if icompz == 2 {
+			z[0] = 1
+		}
+		return true
+	}
+
+	eps := dlamch_e
+	eps2 := eps * eps
+	safmin := dlamch_s
+	safmax := 1 / safmin
+	ssfmax := math.sqrt(safmax) / 3
+	ssfmin := math.sqrt(safmin) / eps2
+
+	// Compute the eigenvalues and eigenvectors of the tridiagonal matrix.
+	if icompz == 2 {
+		dlaset(.all, n, n, 0, 1, mut z, ldz)
+	}
+	maxit := 30
+	nmaxit := n * maxit
+
+	mut jtot := 0
+
+	// Determine where the matrix splits and choose QL or QR iteration for each
+	// block, according to whether top or bottom diagonal element is smaller.
+	mut l1 := 0
+	nm1 := n - 1
+
+	down := 1
+	up := 2
+	mut iscale := 0
+
+	for {
+		if l1 > n - 1 {
+			// Order eigenvalues and eigenvectors.
+			if icompz == 0 {
+				dlasrt(.sort_increasing, n, mut d)
+			} else {
+				for ii := 1; ii < n; ii++ {
+					i := ii - 1
+					mut k := i
+					mut p := d[i]
+					for j := ii; j < n; j++ {
+						if d[j] < p {
+							k = j
+							p = d[j]
+						}
+					}
+					if k != i {
+						d[k] = d[i]
+						d[i] = p
+						blas.dswap(n, mut z[i..], ldz, mut z[k..], ldz)
+					}
+				}
+			}
+			return true
+		}
+		if l1 > 0 {
+			e[l1 - 1] = 0
+		}
+		mut m := 0
+		if l1 <= nm1 {
+			for m = l1; m < nm1; m++ {
+				test := math.abs(e[m])
+				if test == 0 {
+					break
+				}
+				if test <= (math.sqrt(math.abs(d[m])) * math.sqrt(math.abs(d[m + 1]))) * eps {
+					e[m] = 0
+					break
+				}
+			}
+		}
+		mut l := l1
+		lsv := l
+		mut lend := m
+		lendsv := lend
+		l1 = m + 1
+		if lend == l {
+			continue
+		}
+
+		// Scale submatrix in rows and columns L to Lend
+		anorm := dlanst(.max_abs, lend - l + 1, d[l..], e[l..])
+		match anorm {
+			0 {
+				continue
+			}
+			ssfmax {
+				iscale = down
+				// Pretend that d and e are matrices with 1 column.
+				dlascl(.general, 0, 0, anorm, ssfmax, lend - l + 1, 1, mut d[l..], 1)
+				dlascl(.general, 0, 0, anorm, ssfmax, lend - l, 1, mut e[l..], 1)
+			}
+			ssfmin {
+				iscale = up
+				dlascl(.general, 0, 0, anorm, ssfmin, lend - l + 1, 1, mut d[l..], 1)
+				dlascl(.general, 0, 0, anorm, ssfmin, lend - l, 1, mut e[l..], 1)
+			}
+			else {}
+		}
+
+		// Choose between QL and QR.
+		if math.abs(d[lend]) < math.abs(d[l]) {
+			lend = lsv
+			l = lendsv
+		}
+		if lend > l {
+			// QL Iteration. Look for small subdiagonal element.
+			for {
+				if l != lend {
+					for m = l; m < lend; m++ {
+						v := math.abs(e[m])
+						if v * v <= (eps2 * math.abs(d[m])) * math.abs(d[m + 1]) + safmin {
+							break
+						}
+					}
+				} else {
+					m = lend
+				}
+				if m < lend {
+					e[m] = 0
+				}
+				mut p := d[l]
+				if m == l {
+					// Eigenvalue found.
+					l++
+					if l > lend {
+						break
+					}
+					continue
+				}
+
+				// If remaining matrix is 2×2, use dlaev2 to compute its eigensystem.
+				if m == l + 1 {
+					if icompz > 0 {
+						d[l], d[l + 1], work[l], work[n - 1 + l] = dlaev2(d[l], e[l],
+							d[l + 1])
+						dlasr(.right, .variable, .backward, n, 2, work[l..], work[n - 1 + l..], mut
+							z[l..], ldz)
+					} else {
+						d[l], d[l + 1] = dlae2(d[l], e[l], d[l + 1])
+					}
+					e[l] = 0
+					l += 2
+					if l > lend {
+						break
+					}
+					continue
+				}
+
+				if jtot == nmaxit {
+					break
+				}
+				jtot++
+
+				// Form shift
+				mut g := (d[l + 1] - p) / (2 * e[l])
+				mut r := dlapy2(g, 1)
+				g = d[m] - p + e[l] / (g + math.copysign(r, g))
+				mut s := 1.0
+				mut c := 1.0
+				p = 0.0
+
+				// Inner loop
+				for i := m - 1; i >= l; i-- {
+					f := s * e[i]
+					b := c * e[i]
+					c, s, r = dlartg(g, f)
+					if i != m - 1 {
+						e[i + 1] = r
+					}
+					g = d[i + 1] - p
+					r = (d[i] - g) * s + 2 * c * b
+					p = s * r
+					d[i + 1] = g + p
+					g = c * r - b
+
+					// If eigenvectors are desired, then save rotations.
+					if icompz > 0 {
+						work[i] = c
+						work[n - 1 + i] = -s
+					}
+				}
+				// If eigenvectors are desired, then apply saved rotations.
+				if icompz > 0 {
+					mm := m - l + 1
+					dlasr(.right, .variable, .backward, n, mm, work[l..], work[n - 1 + l..], mut
+						z[l..], ldz)
+				}
+				d[l] -= p
+				e[l] = g
+			}
+		} else {
+			// QR Iteration.
+			// Look for small superdiagonal element.
+			for {
+				if l != lend {
+					for m = l; m > lend; m-- {
+						v := math.abs(e[m - 1])
+						if v * v <= (eps2 * math.abs(d[m]) * math.abs(d[m - 1]) + safmin) {
+							break
+						}
+					}
+				} else {
+					m = lend
+				}
+				if m > lend {
+					e[m - 1] = 0
+				}
+				mut p := d[l]
+				if m == l {
+					// Eigenvalue found
+					l--
+					if l < lend {
+						break
+					}
+					continue
+				}
+
+				// If remaining matrix is 2×2, use dlae2 to compute its eigenvalues.
+				if m == l - 1 {
+					if icompz > 0 {
+						d[l - 1], d[l], work[m], work[n - 1 + m] = dlaev2(d[l - 1], e[l - 1],
+							d[l])
+						dlasr(.right, .variable, .forward, n, 2, work[m..], work[n - 1 + m..], mut
+							z[l - 1..], ldz)
+					} else {
+						d[l - 1], d[l] = dlae2(d[l - 1], e[l - 1], d[l])
+					}
+					e[l - 1] = 0
+					l -= 2
+					if l < lend {
+						break
+					}
+					continue
+				}
+				if jtot == nmaxit {
+					break
+				}
+				jtot++
+
+				// Form shift.
+				mut g := (d[l - 1] - p) / (2 * e[l - 1])
+				mut r := dlapy2(g, 1)
+				g = d[m] - p + (e[l - 1]) / (g + math.copysign(r, g))
+				mut s := 1.0
+				mut c := 1.0
+				p = 0.0
+
+				// Inner loop.
+				for i := m; i < l; i++ {
+					f := s * e[i]
+					b := c * e[i]
+					c, s, r = dlartg(g, f)
+					if i != m {
+						e[i - 1] = r
+					}
+					g = d[i] - p
+					r = (d[i + 1] - g) * s + 2 * c * b
+					p = s * r
+					d[i] = g + p
+					g = c * r - b
+
+					// If eigenvectors are desired, then save rotations.
+					if icompz > 0 {
+						work[i] = c
+						work[n - 1 + i] = s
+					}
+				}
+
+				// If eigenvectors are desired, then apply saved rotations.
+				if icompz > 0 {
+					mm := l - m + 1
+					dlasr(.right, .variable, .forward, n, mm, work[m..], work[n - 1 + m..], mut
+						z[m..], ldz)
+				}
+				d[l] -= p
+				e[l - 1] = g
+			}
+		}
+
+		// Undo scaling if necessary.
+		match iscale {
+			down {
+				// Pretend that d and e are matrices with 1 column.
+				dlascl(.general, 0, 0, ssfmax, anorm, lendsv - lsv + 1, 1, mut d[lsv..],
+					1)
+				dlascl(.general, 0, 0, ssfmax, anorm, lendsv - lsv, 1, mut e[lsv..], 1)
+			}
+			up {
+				dlascl(.general, 0, 0, ssfmin, anorm, lendsv - lsv + 1, 1, mut d[lsv..],
+					1)
+				dlascl(.general, 0, 0, ssfmin, anorm, lendsv - lsv, 1, mut e[lsv..], 1)
+			}
+			else {}
+		}
+
+		// Check for no convergence to an eigenvalue after a total of n*maxit iterations.
+		if jtot >= nmaxit {
+			break
+		}
+	}
+	for i := 0; i < n - 1; i++ {
+		if e[i] != 0 {
+			return false
+		}
+	}
+	return true
+}
diff --git a/lapack/lapack64/dsyev.v b/lapack/lapack64/dsyev.v
index a65f019fa..12cf3638b 100644
--- a/lapack/lapack64/dsyev.v
+++ b/lapack/lapack64/dsyev.v
@@ -88,8 +88,8 @@ pub fn dsyev(jobz EVJob, uplo blas.Uplo, n int, mut a []f64, lda int, mut w []f6
 			panic('Dsterf failed')
 		}
 	} else {
-		dorgtr(uplo, n, mut a, lda, mut work[indtau..], mut work[indwork..], llwork)
-		if !dsteqr(EvComp(jobz), n, mut w, mut work[inde..], mut a, lda, mut work[indtau..]) {
+		dorgtr(uplo, n, mut a, lda, work[indtau..], mut work[indwork..], llwork)
+		if !dsteqr(EVComp(jobz), n, mut w, mut work[inde..], mut a, lda, mut work[indtau..]) {
 			panic('Dsteqr failed')
 		}
 	}
diff --git a/lapack/lapack64/dsytd2.v b/lapack/lapack64/dsytd2.v
new file mode 100644
index 000000000..485440d7e
--- /dev/null
+++ b/lapack/lapack64/dsytd2.v
@@ -0,0 +1,144 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// Dsytd2 reduces a symmetric n×n matrix A to symmetric tridiagonal form T by
+// an orthogonal similarity transformation
+//
+//  Qᵀ * A * Q = T
+//
+// On entry, the matrix is contained in the specified triangle of a. On exit,
+// if uplo == Uplo.upper, the diagonal and first super-diagonal of a are
+// overwritten with the elements of T. The elements above the first super-diagonal
+// are overwritten with the elementary reflectors that are used with
+// the elements written to tau in order to construct Q. If uplo == Uplo.lower,
+// the elements are written in the lower triangular region.
+//
+// d must have length at least n. e and tau must have length at least n-1. Dsytd2
+// will panic if these sizes are not met.
+//
+// Q is represented as a product of elementary reflectors.
+// If uplo == Uplo.upper
+//
+//  Q = H_{n-2} * ... * H_1 * H_0
+//
+// and if uplo == Uplo.lower
+//
+//  Q = H_0 * H_1 * ... * H_{n-2}
+//
+// where
+//
+//  H_i = I - tau * v * vᵀ
+//
+// where tau is stored in tau[i], and v is stored in a.
+//
+// If uplo == Uplo.upper, v[0:i-1] is stored in A[0:i-1,i+1], v[i] = 1, and
+// v[i+1:] = 0. The elements of a are
+//
+//  [ d   e  v2  v3  v4]
+//  [     d   e  v3  v4]
+//  [         d   e  v4]
+//  [             d   e]
+//  [                 d]
+//
+// If uplo == Uplo.lower, v[0:i+1] = 0, v[i+1] = 1, and v[i+2:] is stored in
+// A[i+2:n,i].
+// The elements of a are
+//
+//  [ d                ]
+//  [ e   d            ]
+//  [v1   e   d        ]
+//  [v1  v2   e   d    ]
+//  [v1  v2  v3   e   d]
+//
+pub fn dsytd2(uplo blas.Uplo, n int, mut a []f64, lda int, mut d []f64, mut e []f64, mut tau []f64) {
+	if uplo != .upper && uplo != .lower {
+		panic(bad_uplo)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return
+	}
+
+	if a.len < (n - 1) * lda + n {
+		panic(short_a)
+	}
+	if d.len < n {
+		panic(short_d)
+	}
+	if e.len < n - 1 {
+		panic(short_e)
+	}
+	if tau.len < n - 1 {
+		panic(short_tau)
+	}
+
+	if uplo == .upper {
+		// Reduce the upper triangle of A.
+		for i := n - 2; i >= 0; i-- {
+			// Generate elementary reflector H_i = I - tau * v * vᵀ to
+			// annihilate A[i:i-1, i+1].
+			taui, _ := dlarfg(i + 1, a[i * lda + i + 1], mut a[0 + i + 1..], lda)
+			e[i] = a[i * lda + i + 1]
+			if taui != 0.0 {
+				// Apply H_i from both sides to A[0:i,0:i].
+				a[i * lda + i + 1] = 1.0
+
+				// Compute x := tau * A * v storing x in tau[0:i].
+				blas.dsymv(.upper, i + 1, taui, a, lda, a[i + 1..], lda, 0, mut tau, 1)
+
+				// Compute w := x - 1/2 * tau * (xᵀ * v) * v.
+				alpha := -0.5 * taui * blas.ddot(i + 1, tau, 1, a[i + 1..], lda)
+				blas.daxpy(i + 1, alpha, a[i + 1..], lda, mut tau, 1)
+
+				// Apply the transformation as a rank-2 update
+				// A = A - v * wᵀ - w * vᵀ.
+				blas.dsyr2(.upper, i + 1, -1.0, a[i + 1..], lda, tau, 1, mut a, lda)
+				a[i * lda + i + 1] = e[i]
+			}
+			d[i + 1] = a[(i + 1) * lda + i + 1]
+			tau[i] = taui
+		}
+		d[0] = a[0]
+	} else {
+		// Reduce the lower triangle of A.
+		for i := 0; i < n - 1; i++ {
+			// Generate elementary reflector H_i = I - tau * v * vᵀ to
+			// annihilate A[i+2:n, i].
+			taui, _ := dlarfg(n - i - 1, a[(i + 1) * lda + i], mut a[math.min(i + 2, n - 1) * lda +
+				i..], lda)
+			e[i] = a[(i + 1) * lda + i]
+			if taui != 0.0 {
+				// Apply H_i from both sides to A[i+1:n, i+1:n].
+				a[(i + 1) * lda + i] = 1.0
+
+				// Compute x := tau * A * v, storing y in tau[i:n-1].
+				blas.dsymv(.lower, n - i - 1, taui, a[(i + 1) * lda + i + 1..], lda, a[(i +
+					1) * lda + i..], lda, 0, mut tau[i..], 1)
+
+				// Compute w := x - 1/2 * tau * (xᵀ * v) * v.
+				alpha := -0.5 * taui * blas.ddot(n - i - 1, tau[i..], 1, a[(i + 1) * lda + i..],
+					lda)
+				blas.daxpy(n - i - 1, alpha, a[(i + 1) * lda + i..], lda, mut tau[i..],
+					1)
+
+				// Apply the transformation as a rank-2 update
+				// A = A - v * wᵀ - w * vᵀ.
+				blas.dsyr2(.lower, n - i - 1, -1.0, a[(i + 1) * lda + i..], lda, tau[i..],
+					1, mut a[(i + 1) * lda + i + 1..], lda)
+				a[(i + 1) * lda + i] = e[i]
+			}
+			d[i] = a[i * lda + i]
+			tau[i] = taui
+		}
+		d[n - 1] = a[(n - 1) * lda + n - 1]
+	}
+}
diff --git a/lapack/lapack64/errors.v b/lapack/lapack64/errors.v
index 91a96995f..642ed523a 100644
--- a/lapack/lapack64/errors.v
+++ b/lapack/lapack64/errors.v
@@ -173,6 +173,6 @@ pub const bad_ld_z = 'lapack: bad leading dimension of Z'
 
 // Panic strings for bad vector increments.
 pub const abs_inc_not_one = 'lapack: increment not one or negative one'
-pub const bad_inc_x = 'lapack: incXpub const <= 0'
-pub const bad_inc_y = 'lapack: incYpub const <= 0'
-pub const zero_inc_v = 'lapack:pub const incvpub const == 0'
+pub const bad_inc_x = 'lapack: incx <= 0'
+pub const bad_inc_y = 'lapack: incy <= 0'
+pub const zero_inc_v = 'lapack: incv == 0'
diff --git a/lapack/lapack64/iladlc.v b/lapack/lapack64/iladlc.v
new file mode 100644
index 000000000..893509b1a
--- /dev/null
+++ b/lapack/lapack64/iladlc.v
@@ -0,0 +1,42 @@
+module lapack64
+
+import math
+
+// iladlc scans a matrix for its last non-zero column. Returns -1 if the matrix
+// is all zeros.
+pub fn iladlc(m int, n int, a []f64, lda int) int {
+	if m < 0 {
+		panic(m_lt0)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	}
+
+	if n == 0 || m == 0 {
+		return -1
+	}
+
+	if a.len < (m - 1) * lda + n {
+		panic(short_a)
+	}
+
+	// Test common case where corner is non-zero.
+	if a[n - 1] != 0 || a[(m - 1) * lda + (n - 1)] != 0 {
+		return n - 1
+	}
+
+	// Scan each row tracking the highest column seen.
+	mut highest := -1
+	for i := 0; i < m; i++ {
+		for j := n - 1; j >= 0; j-- {
+			if a[i * lda + j] != 0 {
+				highest = math.max(highest, j)
+				break
+			}
+		}
+	}
+	return highest
+}
diff --git a/lapack/lapack64/iladlr.v b/lapack/lapack64/iladlr.v
new file mode 100644
index 000000000..a412219f2
--- /dev/null
+++ b/lapack/lapack64/iladlr.v
@@ -0,0 +1,38 @@
+module lapack64
+
+import math
+
+// iladlr scans a matrix for its last non-zero row. Returns -1 if the matrix
+// is all zeros.
+pub fn iladlr(m int, n int, a []f64, lda int) int {
+	if m < 0 {
+		panic(m_lt0)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	}
+
+	if n == 0 || m == 0 {
+		return -1
+	}
+
+	if a.len < (m - 1) * lda + n {
+		panic(short_a)
+	}
+
+	// Check the common case where the corner is non-zero
+	if a[(m - 1) * lda] != 0 || a[(m - 1) * lda + n - 1] != 0 {
+		return m - 1
+	}
+	for i := m - 1; i >= 0; i-- {
+		for j := 0; j < n; j++ {
+			if a[i * lda + j] != 0 {
+				return i
+			}
+		}
+	}
+	return -1
+}

From e83de293d661b7036e276105c5cc2ac10a900964 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 23 Jun 2024 04:06:14 -0300
Subject: [PATCH 28/33] refactor: Update LAPACK functions in
 lapack_notd_vsl_lapack_lapacke.v to handle errors and use named constants

---
 lapack/cflags_notd_vsl_lapack_lapacke.v | 14 --------------
 lapack/lapack_notd_vsl_lapack_lapacke.v |  8 ++++----
 2 files changed, 4 insertions(+), 18 deletions(-)
 delete mode 100644 lapack/cflags_notd_vsl_lapack_lapacke.v

diff --git a/lapack/cflags_notd_vsl_lapack_lapacke.v b/lapack/cflags_notd_vsl_lapack_lapacke.v
deleted file mode 100644
index 081f788db..000000000
--- a/lapack/cflags_notd_vsl_lapack_lapacke.v
+++ /dev/null
@@ -1,14 +0,0 @@
-module lapack
-
-#flag linux -O2 -I/usr/local/include -I/usr/lib
-#flag linux -L/usr/local/lib -L/usr/lib
-#flag windows -O2
-#flag windows -lgfortran
-// Intel, M1 brew, and MacPorts
-#flag darwin -L/usr/local/opt/lapack/lib -L/opt/homebrew/opt/lapack/lib -L/opt/local/opt/lapack/lib
-#flag -I@VMODROOT
-#flag -llapacke
-
-$if macos {
-	#include <lapacke.h>
-}
diff --git a/lapack/lapack_notd_vsl_lapack_lapacke.v b/lapack/lapack_notd_vsl_lapack_lapacke.v
index e16020046..e46de9120 100644
--- a/lapack/lapack_notd_vsl_lapack_lapacke.v
+++ b/lapack/lapack_notd_vsl_lapack_lapacke.v
@@ -55,7 +55,7 @@ pub fn dgesvd(jobu SVDJob, jobvt SVDJob, m int, n int, mut a []f64, lda int, s [
 	info := lapack64.dgesvd(jobu, jobvt, m, n, mut a, lda, s, mut u, ldu, mut vt, ldvt,
 		superb)
 	if info != 0 {
-		errors.vsl_panic('lapack failed', .efailed)
+		errors.vsl_panic('LAPACK dgesvd failed with error code: $info', .efailed)
 	}
 }
 
@@ -90,7 +90,7 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, mut ipiv []int) {
 pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) {
 	info := lapack64.dgetri(n, mut a, lda, mut ipiv)
 	if info != 0 {
-		errors.vsl_panic('lapack failed', .efailed)
+		errors.vsl_panic('LAPACK dgesvd failed with error code: $info', .efailed)
 	}
 }
 
@@ -114,7 +114,7 @@ pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) {
 pub fn dpotrf(uplo blas.Uplo, n int, mut a []f64, lda int) {
 	info := lapack64.dpotrf(uplo, n, mut a, lda)
 	if info != 0 {
-		errors.vsl_panic('lapack failed', .efailed)
+		errors.vsl_panic('LAPACK dgesvd failed with error code: $info', .efailed)
 	}
 }
 
@@ -163,6 +163,6 @@ pub fn dgeev(calc_vl LeftEVJob, calc_vr LeftEVJob, n int, mut a []f64, lda int,
 	info := lapack64.dgeev(calc_vl, calc_vr, n, mut a, lda, wr, wi, mut vl, ldvl, mut
 		vr, ldvr)
 	if info != 0 {
-		errors.vsl_panic('lapack failed', .efailed)
+		errors.vsl_panic('LAPACK dgesvd failed with error code: $info', .efailed)
 	}
 }

From b06e436dc5812c305c671776b6b1092b8850e6c9 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 23 Jun 2024 04:10:48 -0300
Subject: [PATCH 29/33] refactor: Update ci.yml to execute tests using Pure V
 Backend

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1cdfa15e6..2761553b2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -64,8 +64,8 @@ jobs:
       - name: Move VSL source code to V Modules
         run: mv ./vsl ~/.vmodules
 
-      # - name: Execute Tests using Pure V Backend
-      #   run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }}
+      - name: Execute Tests using Pure V Backend
+        run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }}
 
       - name: Execute Tests using Pure V Backend with Pure C Backend
         run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} --use-cblas --use-lapacke

From 36ae80e00f12ae032447678cc1401af4d0751454 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 23 Jun 2024 04:13:54 -0300
Subject: [PATCH 30/33] refactor: Update ci.yml to execute tests using Pure V
 Backend with CBLAS and LAPACKE

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2761553b2..34dc5c559 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -65,7 +65,7 @@ jobs:
         run: mv ./vsl ~/.vmodules
 
       - name: Execute Tests using Pure V Backend
-        run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }}
+        run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} --use-cblas
 
       - name: Execute Tests using Pure V Backend with Pure C Backend
         run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} --use-cblas --use-lapacke

From 63adcf6a867277f3a0bb6c892e074b1eb506904b Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 23 Jun 2024 13:05:56 -0300
Subject: [PATCH 31/33] refactor: Update ci.yml to execute tests using Pure V
 Backend with CBLAS and LAPACKE

---
 lapack/conversions.v                    | 24 ++++++++++-----------
 lapack/lapack64/conversions.v           | 28 ++++++++++++-------------
 lapack/lapack64/dgeev.v                 |  2 +-
 lapack/lapack64/dgesvd.v                |  2 +-
 lapack/lapack64/dsteqr.v                | 10 ++++-----
 lapack/lapack64/dsyev.v                 |  5 +++--
 lapack/lapack64/errors.v                | 14 ++++++-------
 lapack/lapack64/ilaenv.v                |  4 ++--
 lapack/lapack_d_vsl_lapack_lapacke.v    | 10 ++++-----
 lapack/lapack_notd_vsl_lapack_lapacke.v | 14 ++++++-------
 10 files changed, 57 insertions(+), 56 deletions(-)

diff --git a/lapack/conversions.v b/lapack/conversions.v
index fcb1cb239..c58237c56 100644
--- a/lapack/conversions.v
+++ b/lapack/conversions.v
@@ -32,17 +32,17 @@ pub type SVDJob = lapack64.SVDJob
 // GSVDJob specifies the singular vector computation type for Generalized SVD.
 pub type GSVDJob = lapack64.GSVDJob
 
-// EVComp specifies how eigenvectors are computed in Dsteqr.
-pub type EVComp = lapack64.EVComp
+// EigenVectorsComp specifies how eigenvectors are computed in Dsteqr.
+pub type EigenVectorsComp = lapack64.EigenVectorsComp
 
-// EVJob specifies whether eigenvectors are computed in Dsyev.
-pub type EVJob = lapack64.EVJob
+// EigenVectorsJob specifies whether eigenvectors are computed in Dsyev.
+pub type EigenVectorsJob = lapack64.EigenVectorsJob
 
-// LeftEVJob specifies whether left eigenvectors are computed in Dgeev.
-pub type LeftEVJob = lapack64.LeftEVJob
+// LeftEigenVectorsJob specifies whether left eigenvectors are computed in Dgeev.
+pub type LeftEigenVectorsJob = lapack64.LeftEigenVectorsJob
 
-// RightEVJob specifies whether right eigenvectors are computed in Dgeev.
-pub type RightEVJob = lapack64.RightEVJob
+// RightEigenVectorsJob specifies whether right eigenvectors are computed in Dgeev.
+pub type RightEigenVectorsJob = lapack64.RightEigenVectorsJob
 
 // BalanceJob specifies matrix balancing operation.
 pub type BalanceJob = lapack64.BalanceJob
@@ -56,11 +56,11 @@ pub type SchurComp = lapack64.SchurComp
 // UpdateSchurComp specifies whether the matrix of Schur vectors is updated in Dtrexc.
 pub type UpdateSchurComp = lapack64.UpdateSchurComp
 
-// EVSide specifies what eigenvectors are computed in Dtrevc3.
-pub type EVSide = lapack64.EVSide
+// EigenVectorsSide specifies what eigenvectors are computed in Dtrevc3.
+pub type EigenVectorsSide = lapack64.EigenVectorsSide
 
-// EVHowMany specifies which eigenvectors are computed in Dtrevc3 and how.
-pub type EVHowMany = lapack64.EVHowMany
+// EigenVectorsHowMany specifies which eigenvectors are computed in Dtrevc3 and how.
+pub type EigenVectorsHowMany = lapack64.EigenVectorsHowMany
 
 // MaximizeNormXJob specifies the heuristic method for computing a contribution to
 // the reciprocal Dif-estimate in Dlatdf.
diff --git a/lapack/lapack64/conversions.v b/lapack/lapack64/conversions.v
index 5052633f9..dbb33db5d 100644
--- a/lapack/lapack64/conversions.v
+++ b/lapack/lapack64/conversions.v
@@ -67,7 +67,7 @@ pub enum GenOrtho as u8 {
 	generate_q  = u8(`Q`)
 }
 
-// SVDJob specifies the singular vector computation type for SVD.
+// SVDJob specifies the singular vector computation type for SingularValueDecomposition.
 pub enum SVDJob as u8 {
 	// Compute all columns of the orthogonal matrix U or V.
 	svd_all       = u8(`A`)
@@ -79,7 +79,7 @@ pub enum SVDJob as u8 {
 	svd_none      = u8(`N`)
 }
 
-// GSVDJob specifies the singular vector computation type for Generalized SVD.
+// GSVDJob specifies the singular vector computation type for Generalized SingularValueDecomposition.
 pub enum GSVDJob as u8 {
 	// Compute orthogonal matrix U.
 	gsvd_u    = u8(`U`)
@@ -93,8 +93,8 @@ pub enum GSVDJob as u8 {
 	gsvd_none = u8(`N`)
 }
 
-// EVComp specifies how eigenvectors are computed in Dsteqr.
-pub enum EVComp as u8 {
+// EigenVectorsComp specifies how eigenvectors are computed in Dsteqr.
+pub enum EigenVectorsComp as u8 {
 	// Compute eigenvectors of the original symmetric matrix.
 	ev_orig      = u8(`V`)
 	// Compute eigenvectors of the tridiagonal matrix.
@@ -103,24 +103,24 @@ pub enum EVComp as u8 {
 	ev_comp_none = u8(`N`)
 }
 
-// EVJob specifies whether eigenvectors are computed in Dsyev.
-pub enum EVJob as u8 {
+// EigenVectorsJob specifies whether eigenvectors are computed in Dsyev.
+pub enum EigenVectorsJob as u8 {
 	// Compute eigenvectors.
 	ev_compute = u8(`V`)
 	// Do not compute eigenvectors.
 	ev_none    = u8(`N`)
 }
 
-// LeftEVJob specifies whether left eigenvectors are computed in Dgeev.
-pub enum LeftEVJob as u8 {
+// LeftEigenVectorsJob specifies whether left eigenvectors are computed in Dgeev.
+pub enum LeftEigenVectorsJob as u8 {
 	// Compute left eigenvectors.
 	left_ev_compute = u8(`V`)
 	// Do not compute left eigenvectors.
 	left_ev_none    = u8(`N`)
 }
 
-// RightEVJob specifies whether right eigenvectors are computed in Dgeev.
-pub enum RightEVJob as u8 {
+// RightEigenVectorsJob specifies whether right eigenvectors are computed in Dgeev.
+pub enum RightEigenVectorsJob as u8 {
 	// Compute right eigenvectors.
 	right_ev_compute = u8(`V`)
 	// Do not compute right eigenvectors.
@@ -159,8 +159,8 @@ pub enum UpdateSchurComp as u8 {
 	update_schur_none = u8(`N`)
 }
 
-// EVSide specifies what eigenvectors are computed in Dtrevc3.
-pub enum EVSide as u8 {
+// EigenVectorsSide specifies what eigenvectors are computed in Dtrevc3.
+pub enum EigenVectorsSide as u8 {
 	// Compute only right eigenvectors.
 	ev_right = u8(`R`)
 	// Compute only left eigenvectors.
@@ -169,8 +169,8 @@ pub enum EVSide as u8 {
 	ev_both  = u8(`B`)
 }
 
-// EVHowMany specifies which eigenvectors are computed in Dtrevc3 and how.
-pub enum EVHowMany as u8 {
+// EigenVectorsHowMany specifies which eigenvectors are computed in Dtrevc3 and how.
+pub enum EigenVectorsHowMany as u8 {
 	// Compute all right and/or left eigenvectors.
 	ev_all       = u8(`A`)
 	// Compute all right and/or left eigenvectors multiplied by an input matrix.
diff --git a/lapack/lapack64/dgeev.v b/lapack/lapack64/dgeev.v
index 38799070c..4ed16de5b 100644
--- a/lapack/lapack64/dgeev.v
+++ b/lapack/lapack64/dgeev.v
@@ -4,7 +4,7 @@ import math
 import vsl.blas
 
 // dgeev computes the eigenvalues and, optionally, the left and/or right eigenvectors for a real nonsymmetric matrix A.
-pub fn dgeev(jobvl LeftEVJob, jobvr LeftEVJob, n int, mut a []f64, lda int, wr []f64, wi []f64, mut vl []f64, ldvl int, mut vr []f64, ldvr int) int {
+pub fn dgeev(jobvl LeftEigenVectorsJob, jobvr LeftEigenVectorsJob, n int, mut a []f64, lda int, wr []f64, wi []f64, mut vl []f64, ldvl int, mut vr []f64, ldvr int) int {
 	if n == 0 {
 		return 0
 	}
diff --git a/lapack/lapack64/dgesvd.v b/lapack/lapack64/dgesvd.v
index a8f035e7d..aad7e51ae 100644
--- a/lapack/lapack64/dgesvd.v
+++ b/lapack/lapack64/dgesvd.v
@@ -3,7 +3,7 @@ module lapack64
 import math
 import vsl.blas
 
-// dgesvd computes the singular value decomposition (SVD) of a real matrix A.
+// dgesvd computes the singular value decomposition (SingularValueDecomposition) of a real matrix A.
 pub fn dgesvd(jobu SVDJob, jobvt SVDJob, m int, n int, mut a []f64, lda int, s []f64, mut u []f64, ldu int, mut vt []f64, ldvt int, superb []f64) int {
 	if m == 0 || n == 0 {
 		return 0
diff --git a/lapack/lapack64/dsteqr.v b/lapack/lapack64/dsteqr.v
index ce61a1f63..e9b2ab7a1 100644
--- a/lapack/lapack64/dsteqr.v
+++ b/lapack/lapack64/dsteqr.v
@@ -17,17 +17,17 @@ import vsl.blas
 // dsteqr will panic otherwise.
 //
 // z, on entry, contains the n×n orthogonal matrix used in the reduction to
-// tridiagonal form if compz == lapack.EVOrig. On exit, if
-// compz == lapack.EVOrig, z contains the orthonormal eigenvectors of the
-// original symmetric matrix, and if compz == lapack.EVTridiag, z contains the
+// tridiagonal form if compz == lapack.EigenVectorsOrig. On exit, if
+// compz == lapack.EigenVectorsOrig, z contains the orthonormal eigenvectors of the
+// original symmetric matrix, and if compz == lapack.EigenVectorsTridiag, z contains the
 // orthonormal eigenvectors of the symmetric tridiagonal matrix. z is not used
-// if compz == lapack.EVCompNone.
+// if compz == lapack.EigenVectorsCompNone.
 //
 // work must have length at least max(1, 2*n-2) if the eigenvectors are computed,
 // and dsteqr will panic otherwise.
 //
 // dsteqr is an internal routine. It is exported for testing purposes.
-pub fn dsteqr(compz EVComp, n int, mut d []f64, mut e []f64, mut z []f64, ldz int, mut work []f64) bool {
+pub fn dsteqr(compz EigenVectorsComp, n int, mut d []f64, mut e []f64, mut z []f64, ldz int, mut work []f64) bool {
 	if compz != .ev_comp_none && compz != .ev_tridiag && compz != .ev_orig {
 		panic('bad_ev_comp')
 	}
diff --git a/lapack/lapack64/dsyev.v b/lapack/lapack64/dsyev.v
index 12cf3638b..ef995f40d 100644
--- a/lapack/lapack64/dsyev.v
+++ b/lapack/lapack64/dsyev.v
@@ -3,7 +3,7 @@ module lapack64
 import math
 import vsl.blas
 
-pub fn dsyev(jobz EVJob, uplo blas.Uplo, n int, mut a []f64, lda int, mut w []f64, mut work []f64, lwork int) {
+pub fn dsyev(jobz EigenVectorsJob, uplo blas.Uplo, n int, mut a []f64, lda int, mut w []f64, mut work []f64, lwork int) {
 	if jobz != .ev_none && jobz != .ev_compute {
 		panic(bad_ev_job)
 	}
@@ -89,7 +89,8 @@ pub fn dsyev(jobz EVJob, uplo blas.Uplo, n int, mut a []f64, lda int, mut w []f6
 		}
 	} else {
 		dorgtr(uplo, n, mut a, lda, work[indtau..], mut work[indwork..], llwork)
-		if !dsteqr(EVComp(jobz), n, mut w, mut work[inde..], mut a, lda, mut work[indtau..]) {
+		if !dsteqr(EigenVectorsComp(jobz), n, mut w, mut work[inde..], mut a, lda, mut
+			work[indtau..]) {
 			panic('Dsteqr failed')
 		}
 	}
diff --git a/lapack/lapack64/errors.v b/lapack/lapack64/errors.v
index 642ed523a..27b95002f 100644
--- a/lapack/lapack64/errors.v
+++ b/lapack/lapack64/errors.v
@@ -7,19 +7,19 @@ pub const bad_apply_ortho = 'lapack: bad ApplyOrtho'
 pub const bad_balance_job = 'lapack: bad BalanceJob'
 pub const bad_diag = 'lapack: bad Diag'
 pub const bad_direct = 'lapack: bad Direct'
-pub const bad_ev_comp = 'lapack: bad EVComp'
-pub const bad_ev_how_many = 'lapack: bad EVHowMany'
-pub const bad_ev_job = 'lapack: bad EVJob'
-pub const bad_ev_side = 'lapack: bad EVSide'
+pub const bad_ev_comp = 'lapack: bad EigenVectorsComp'
+pub const bad_ev_how_many = 'lapack: bad EigenVectorsHowMany'
+pub const bad_ev_job = 'lapack: bad EigenVectorsJob'
+pub const bad_ev_side = 'lapack: bad EigenVectorsSide'
 pub const bad_gsvd_job = 'lapack: bad GSVDJob'
 pub const bad_gen_ortho = 'lapack: bad GenOrtho'
-pub const bad_left_ev_job = 'lapack: bad LeftEVJob'
+pub const bad_left_ev_job = 'lapack: bad LeftEigenVectorsJob'
 pub const bad_matrix_type = 'lapack: bad MatrixType'
 pub const bad_maximize_norm_x_job = 'lapack: bad MaximizeNormXJob'
 pub const bad_norm = 'lapack: bad Norm'
 pub const bad_ortho_comp = 'lapack: bad OrthoComp'
 pub const bad_pivot = 'lapack: bad Pivot'
-pub const bad_right_ev_job = 'lapack: bad RightEVJob'
+pub const bad_right_ev_job = 'lapack: bad RightEigenVectorsJob'
 pub const bad_svd_job = 'lapack: bad SVDJob'
 pub const bad_schur_comp = 'lapack: bad SchurComp'
 pub const bad_schur_job = 'lapack: bad SchurJob'
@@ -29,7 +29,7 @@ pub const bad_store_v = 'lapack: bad StoreV'
 pub const bad_trans = 'lapack: bad Trans'
 pub const bad_update_schur_comp = 'lapack: bad UpdateSchurComp'
 pub const bad_uplo = 'lapack: bad Uplo'
-pub const both_svd_over = 'lapack: both jobU and jobVT are lapack.SVDOverwrite'
+pub const both_svd_over = 'lapack: both jobU and jobVT are lapack.SingularValueDecompositionOverwrite'
 
 // Panic strings for bad numerical and string values.
 pub const bad_ifst = 'lapack: ifst out of range'
diff --git a/lapack/lapack64/ilaenv.v b/lapack/lapack64/ilaenv.v
index f9661e1c2..7b1093e90 100644
--- a/lapack/lapack64/ilaenv.v
+++ b/lapack/lapack64/ilaenv.v
@@ -11,7 +11,7 @@ import math
 //	   crossover to an unblocked version.
 //	4: The number of shifts.
 //	5: The minimum column dimension for blocking to be used.
-//	6: The crossover point for SVD (to use QR factorization or not).
+//	6: The crossover point for SingularValueDecomposition (to use QR factorization or not).
 //	7: The number of processors.
 //	8: The crossover point for multi-shift in QR and QZ methods for non-symmetric eigenvalue problems.
 //	9: Maximum size of the subproblems in divide-and-conquer algorithms.
@@ -241,7 +241,7 @@ fn ilaenv(ispec int, name string, opts string, n1 int, n2 int, n3 int, n4 int) i
 			return 2
 		}
 		6 {
-			// Used by xGELSS and xGESVD
+			// Used by xGELSS and xGESingularValueDecomposition
 			// Assuming n1 and n2 are defined elsewhere in your code
 			// Replace `min(n1, n2)` with actual min calculation or function
 			return int(f64(math.min(n1, n2)) * 1.6)
diff --git a/lapack/lapack_d_vsl_lapack_lapacke.v b/lapack/lapack_d_vsl_lapack_lapacke.v
index bd96129ae..39113c5bc 100644
--- a/lapack/lapack_d_vsl_lapack_lapacke.v
+++ b/lapack/lapack_d_vsl_lapack_lapacke.v
@@ -13,9 +13,9 @@ fn C.LAPACKE_dgetri(matrix_layout blas.MemoryLayout, n int, a &f64, lda int, ipi
 
 fn C.LAPACKE_dpotrf(matrix_layout blas.MemoryLayout, uplo blas.Uplo, n int, a &f64, lda int) int
 
-fn C.LAPACKE_dgeev(matrix_layout blas.MemoryLayout, calc_vl LeftEVJob, calc_vr LeftEVJob, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int
+fn C.LAPACKE_dgeev(matrix_layout blas.MemoryLayout, calc_vl LeftEigenVectorsJob, calc_vr LeftEigenVectorsJob, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int
 
-fn C.LAPACKE_dsyev(matrix_layout blas.MemoryLayout, jobz EVJob, uplo blas.Uplo, n int, a &f64, lda int, w &f64, work &f64, lwork int) int
+fn C.LAPACKE_dsyev(matrix_layout blas.MemoryLayout, jobz EigenVectorsJob, uplo blas.Uplo, n int, a &f64, lda int, w &f64, work &f64, lwork int) int
 
 fn C.LAPACKE_dgebal(matrix_layout blas.MemoryLayout, job BalanceJob, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int
 
@@ -54,13 +54,13 @@ pub fn dgesv(n int, nrhs int, mut a []f64, lda int, mut ipiv []int, mut b []f64,
 	}
 }
 
-// dgesvd computes the singular value decomposition (SVD) of a real M-by-N matrix A, optionally computing the left and/or right singular vectors.
+// dgesvd computes the singular value decomposition (SingularValueDecomposition) of a real M-by-N matrix A, optionally computing the left and/or right singular vectors.
 //
 // See: http://www.netlib.org/lapack/explore-html/d8/d2d/dgesvd_8f.html
 //
 // See: https://software.intel.com/en-us/mkl-developer-reference-c-gesvd
 //
-// The SVD is written
+// The SingularValueDecomposition is written
 //
 // A = U * SIGMA * transpose(V)
 //
@@ -173,7 +173,7 @@ pub fn dpotrf(uplo blas.Uplo, n int, mut a []f64, lda int) {
 //
 // The computed eigenvectors are normalized to have Euclidean norm
 // equal to 1 and largest component real.
-pub fn dgeev(calc_vl LeftEVJob, calc_vr LeftEVJob, n int, mut a []f64, lda int, wr []f64, wi []f64, vl []f64, ldvl_ int, vr []f64, ldvr_ int) {
+pub fn dgeev(calc_vl LeftEigenVectorsJob, calc_vr LeftEigenVectorsJob, n int, mut a []f64, lda int, wr []f64, wi []f64, vl []f64, ldvl_ int, vr []f64, ldvr_ int) {
 	mut vvl := 0.0
 	mut vvr := 0.0
 	mut ldvl := ldvl_
diff --git a/lapack/lapack_notd_vsl_lapack_lapacke.v b/lapack/lapack_notd_vsl_lapack_lapacke.v
index e46de9120..5dc5bb6c0 100644
--- a/lapack/lapack_notd_vsl_lapack_lapacke.v
+++ b/lapack/lapack_notd_vsl_lapack_lapacke.v
@@ -31,13 +31,13 @@ pub fn dgesv(n int, nrhs int, mut a []f64, lda int, mut ipiv []int, mut b []f64,
 	lapack64.dgesv(n, nrhs, mut a, lda, mut ipiv, mut b, ldb)
 }
 
-// dgesvd computes the singular value decomposition (SVD) of a real M-by-N matrix A, optionally computing the left and/or right singular vectors.
+// dgesvd computes the singular value decomposition (SingularValueDecomposition) of a real M-by-N matrix A, optionally computing the left and/or right singular vectors.
 //
 // See: http://www.netlib.org/lapack/explore-html/d8/d2d/dgesvd_8f.html
 //
 // See: https://software.intel.com/en-us/mkl-developer-reference-c-gesvd
 //
-// The SVD is written
+// The SingularValueDecomposition is written
 //
 // A = U * SIGMA * transpose(V)
 //
@@ -55,7 +55,7 @@ pub fn dgesvd(jobu SVDJob, jobvt SVDJob, m int, n int, mut a []f64, lda int, s [
 	info := lapack64.dgesvd(jobu, jobvt, m, n, mut a, lda, s, mut u, ldu, mut vt, ldvt,
 		superb)
 	if info != 0 {
-		errors.vsl_panic('LAPACK dgesvd failed with error code: $info', .efailed)
+		errors.vsl_panic('LAPACK dgesvd failed with error code: ${info}', .efailed)
 	}
 }
 
@@ -90,7 +90,7 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, mut ipiv []int) {
 pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) {
 	info := lapack64.dgetri(n, mut a, lda, mut ipiv)
 	if info != 0 {
-		errors.vsl_panic('LAPACK dgesvd failed with error code: $info', .efailed)
+		errors.vsl_panic('LAPACK dgesvd failed with error code: ${info}', .efailed)
 	}
 }
 
@@ -114,7 +114,7 @@ pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) {
 pub fn dpotrf(uplo blas.Uplo, n int, mut a []f64, lda int) {
 	info := lapack64.dpotrf(uplo, n, mut a, lda)
 	if info != 0 {
-		errors.vsl_panic('LAPACK dgesvd failed with error code: $info', .efailed)
+		errors.vsl_panic('LAPACK dgesvd failed with error code: ${info}', .efailed)
 	}
 }
 
@@ -141,7 +141,7 @@ pub fn dpotrf(uplo blas.Uplo, n int, mut a []f64, lda int) {
 //
 // The computed eigenvectors are normalized to have Euclidean norm
 // equal to 1 and largest component real.
-pub fn dgeev(calc_vl LeftEVJob, calc_vr LeftEVJob, n int, mut a []f64, lda int, wr []f64, wi []f64, mut vl []f64, ldvl_ int, mut vr []f64, ldvr_ int) {
+pub fn dgeev(calc_vl LeftEigenVectorsJob, calc_vr LeftEigenVectorsJob, n int, mut a []f64, lda int, wr []f64, wi []f64, mut vl []f64, ldvl_ int, mut vr []f64, ldvr_ int) {
 	mut vvl := 0.0
 	mut vvr := 0.0
 	mut ldvl := ldvl_
@@ -163,6 +163,6 @@ pub fn dgeev(calc_vl LeftEVJob, calc_vr LeftEVJob, n int, mut a []f64, lda int,
 	info := lapack64.dgeev(calc_vl, calc_vr, n, mut a, lda, wr, wi, mut vl, ldvl, mut
 		vr, ldvr)
 	if info != 0 {
-		errors.vsl_panic('LAPACK dgesvd failed with error code: $info', .efailed)
+		errors.vsl_panic('LAPACK dgesvd failed with error code: ${info}', .efailed)
 	}
 }

From 8afd14a075983fcfefa7e7f7444a5c69c6f82dd8 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 23 Jun 2024 14:38:16 -0300
Subject: [PATCH 32/33] refactor: Update dpotrf function to use named constant
 for uplo parameter

---
 lapack/lapack64/dpotf2.v | 71 ++++++++++++++++++++++++++++++++++++++++
 lapack/lapack64/dpotrf.v | 69 ++++++++++++++++++++++++++++----------
 2 files changed, 122 insertions(+), 18 deletions(-)
 create mode 100644 lapack/lapack64/dpotf2.v

diff --git a/lapack/lapack64/dpotf2.v b/lapack/lapack64/dpotf2.v
new file mode 100644
index 000000000..22640a366
--- /dev/null
+++ b/lapack/lapack64/dpotf2.v
@@ -0,0 +1,71 @@
+module lapack64
+
+import math
+import vsl.blas
+
+// dpotf2 computes the Cholesky decomposition of the symmetric positive definite
+// matrix a. If ul == .upper, then a is stored as an upper-triangular matrix,
+// and a = Uᵀ U is stored in place into a. If ul == .lower, then a = L Lᵀ
+// is computed and stored in-place into a. If a is not positive definite, false
+// is returned. This is the unblocked version of the algorithm.
+//
+// dpotf2 is an internal routine. It is exported for testing purposes.
+pub fn dpotf2(ul blas.Uplo, n int, mut a []f64, lda int) bool {
+	if ul != .upper && ul != .lower {
+		panic(bad_uplo)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return true
+	}
+
+	if a.len < (n - 1) * lda + n {
+		panic(short_a)
+	}
+
+	if ul == .upper {
+		for j := 0; j < n; j++ {
+			mut ajj := a[j * lda + j]
+			if j != 0 {
+				ajj -= blas.ddot(j, a[j..], lda, a[j..], lda)
+			}
+			if ajj <= 0 || math.is_nan(ajj) {
+				a[j * lda + j] = ajj
+				return false
+			}
+			ajj = math.sqrt(ajj)
+			a[j * lda + j] = ajj
+			if j < n - 1 {
+				blas.dgemv(.trans, j, n - j - 1, -1, a[j + 1..], lda, a[j..], lda, 1, mut
+					a[j * lda + j + 1..], 1)
+				blas.dscal(n - j - 1, 1 / ajj, mut a[j * lda + j + 1..], 1)
+			}
+		}
+		return true
+	}
+	for j := 0; j < n; j++ {
+		mut ajj := a[j * lda + j]
+		if j != 0 {
+			ajj -= blas.ddot(j, a[j * lda..], 1, a[j * lda..], 1)
+		}
+		if ajj <= 0 || math.is_nan(ajj) {
+			a[j * lda + j] = ajj
+			return false
+		}
+		ajj = math.sqrt(ajj)
+		a[j * lda + j] = ajj
+		if j < n - 1 {
+			blas.dgemv(.no_trans, n - j - 1, j, -1, a[(j + 1) * lda..], lda, a[j * lda..],
+				1, 1, mut a[(j + 1) * lda + j..], lda)
+			blas.dscal(n - j - 1, 1 / ajj, mut a[(j + 1) * lda + j..], lda)
+		}
+	}
+	return true
+}
diff --git a/lapack/lapack64/dpotrf.v b/lapack/lapack64/dpotrf.v
index b4d307809..d8e1c6e45 100644
--- a/lapack/lapack64/dpotrf.v
+++ b/lapack/lapack64/dpotrf.v
@@ -3,31 +3,64 @@ module lapack64
 import math
 import vsl.blas
 
-// dpotrf computes the Cholesky factorization of a real symmetric positive definite matrix A.
-pub fn dpotrf(uplo blas.Uplo, n int, mut a []f64, lda int) int {
+pub fn dpotrf(ul blas.Uplo, n int, mut a []f64, lda int) bool {
+	if ul != .upper && ul != .lower {
+		panic(bad_uplo)
+	}
+	if n < 0 {
+		panic(n_lt0)
+	}
+	if lda < math.max(1, n) {
+		panic(bad_ld_a)
+	}
+
+	// Quick return if possible.
 	if n == 0 {
-		return 0
+		return true
 	}
 
-	mut info := 0
-	if uplo != .upper && uplo != .lower {
-		info = -1
-	} else if n < 0 {
-		info = -2
-	} else if lda < math.max(1, n) {
-		info = -4
+	if a.len < (n - 1) * lda + n {
+		panic(short_a)
 	}
 
-	if info != 0 {
-		return info
+	nb := ilaenv(1, 'DPOTRF', ul.str(), n, -1, -1, -1)
+	if nb <= 1 || n <= nb {
+		return dpotf2(ul, n, mut a, lda)
 	}
 
-	// Quick return if possible
-	if n == 0 {
-		return 0
+	if ul == .upper {
+		for j := 0; j < n; j += nb {
+			jb := math.min(nb, n - j)
+			blas.dsyrk(.upper, .trans, jb, j, -1, a[j..], lda, 1, mut a[j * lda + j..],
+				lda)
+			ok := dpotf2(.upper, jb, mut a[j * lda + j..], lda)
+			if !ok {
+				return false
+			}
+			if j + jb < n {
+				blas.dgemm(.trans, .no_trans, jb, n - j - jb, j, -1, a[j..], lda, a[j + jb..],
+					lda, 1, mut a[j * lda + j + jb..], lda)
+				blas.dtrsm(.left, .upper, .trans, .non_unit, jb, n - j - jb, 1, a[j * lda + j..],
+					lda, mut a[j * lda + j + jb..], lda)
+			}
+		}
+		return true
 	}
 
-	// Placeholder for the actual LAPACK function calls
-	// Example: info = dpotrf(uplo, n, a, lda, work, lwork)
-	return info
+	for j := 0; j < n; j += nb {
+		jb := math.min(nb, n - j)
+		blas.dsyrk(.lower, .no_trans, jb, j, -1, a[j * lda..], lda, 1, mut a[j * lda + j..],
+			lda)
+		ok := dpotf2(.lower, jb, mut a[j * lda + j..], lda)
+		if !ok {
+			return false
+		}
+		if j + jb < n {
+			blas.dgemm(.no_trans, .trans, n - j - jb, jb, j, -1, a[(j + jb) * lda..],
+				lda, a[j * lda..], lda, 1, mut a[(j + jb) * lda + j..], lda)
+			blas.dtrsm(.right, .lower, .trans, .non_unit, n - j - jb, jb, 1, a[j * lda + j..],
+				lda, mut a[(j + jb) * lda + j..], lda)
+		}
+	}
+	return true
 }

From 3af89bf4fa1a16684646f7da6c36100cb8a3c313 Mon Sep 17 00:00:00 2001
From: ulises-jeremias <ulisescf.24@gmail.com>
Date: Sun, 23 Jun 2024 14:49:16 -0300
Subject: [PATCH 33/33] refactor: Update BLAS and LAPACK functions to use named
 constants and handle errors

---
 blas/oblas_notd_vsl_blas_cblas.v | 87 ++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/blas/oblas_notd_vsl_blas_cblas.v b/blas/oblas_notd_vsl_blas_cblas.v
index f05117128..53602851b 100644
--- a/blas/oblas_notd_vsl_blas_cblas.v
+++ b/blas/oblas_notd_vsl_blas_cblas.v
@@ -6,77 +6,164 @@ import vsl.blas.blas64
 @[inline]
 pub fn set_num_threads(n int) {}
 
+// ddot computes the dot product of two vectors.
 @[inline]
 pub fn ddot(n int, x []f64, incx int, y []f64, incy int) f64 {
 	return blas64.ddot(n, x, incx, y, incy)
 }
 
+// dasum computes the sum of the absolute values of elements in a vector.
 @[inline]
 pub fn dasum(n int, x []f64, incx int) f64 {
 	return blas64.dasum(n, x, incx)
 }
 
+// dnrm2 computes the Euclidean norm of a vector.
 @[inline]
 pub fn dnrm2(n int, x []f64, incx int) f64 {
 	return blas64.dnrm2(n, x, incx)
 }
 
+// daxpy computes y := alpha * x + y.
 @[inline]
 pub fn daxpy(n int, alpha f64, x []f64, incx int, mut y []f64, incy int) {
 	blas64.daxpy(n, alpha, x, incx, mut y, incy)
 }
 
+// dcopy copies a vector x to a vector y.
 @[inline]
 pub fn dcopy(n int, x []f64, incx int, mut y []f64, incy int) {
 	blas64.dcopy(n, x, incx, mut y, incy)
 }
 
+// dswap swaps the elements of two vectors.
 @[inline]
 pub fn dswap(n int, mut x []f64, incx int, mut y []f64, incy int) {
 	blas64.dswap(n, mut x, incx, mut y, incy)
 }
 
+// drot applies a plane rotation to points in the plane.
 @[inline]
 pub fn drot(n int, mut x []f64, incx int, mut y []f64, incy int, c f64, s f64) {
 	blas64.drot(n, mut x, incx, mut y, incy, c, s)
 }
 
+// dscal scales a vector by a constant.
 @[inline]
 pub fn dscal(n int, alpha f64, mut x []f64, incx int) {
 	blas64.dscal(n, alpha, mut x, incx)
 }
 
+// idamax finds the index of the element with the maximum absolute value.
+@[inline]
+pub fn idamax(n int, x []f64, incx int) int {
+	return blas64.idamax(n, x, incx)
+}
+
+// dgemv performs matrix-vector multiplication.
 @[inline]
 pub fn dgemv(trans Transpose, m int, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
 	blas64.dgemv(trans, m, n, alpha, a, lda, x, incx, beta, mut y, incy)
 }
 
+// dger performs the rank-1 update of a matrix.
 @[inline]
 pub fn dger(m int, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) {
 	blas64.dger(m, n, alpha, x, incx, y, incy, mut a, lda)
 }
 
+// dtrsv solves a system of linear equations with a triangular matrix.
 @[inline]
 pub fn dtrsv(uplo Uplo, trans_a Transpose, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
 	blas64.dtrsv(uplo, trans_a, diag, n, a, lda, mut x, incx)
 }
 
+// dtrmv performs matrix-vector operations using a triangular matrix.
 @[inline]
 pub fn dtrmv(uplo Uplo, trans_a Transpose, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) {
 	blas64.dtrmv(uplo, trans_a, diag, n, a, lda, mut x, incx)
 }
 
+// dsyr performs a symmetric rank-1 update of a matrix.
 @[inline]
 pub fn dsyr(uplo Uplo, n int, alpha f64, x []f64, incx int, mut a []f64, lda int) {
 	blas64.dsyr(uplo, n, alpha, x, incx, mut a, lda)
 }
 
+// dsyr2 performs a symmetric rank-2 update of a matrix.
 @[inline]
 pub fn dsyr2(uplo Uplo, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) {
 	blas64.dsyr2(uplo, n, alpha, x, incx, y, incy, mut a, lda)
 }
 
+// dgemm performs matrix-matrix multiplication.
 @[inline]
 pub fn dgemm(trans_a Transpose, trans_b Transpose, m int, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut cc []f64, ldc int) {
 	blas64.dgemm(trans_a, trans_b, m, n, k, alpha, a, lda, b, ldb, beta, mut cc, ldc)
 }
+
+// dgbmv performs a matrix-vector multiplication with a band matrix.
+@[inline]
+pub fn dgbmv(trans_a Transpose, m int, n int, kl int, ku int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
+	blas64.dgbmv(trans_a, m, n, kl, ku, alpha, a, lda, x, incx, beta, mut y, incy)
+}
+
+// dsymv performs a matrix-vector multiplication for a symmetric matrix.
+@[inline]
+pub fn dsymv(uplo Uplo, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
+	blas64.dsymv(uplo, n, alpha, a, lda, x, incx, beta, mut y, incy)
+}
+
+// dsbmv performs a matrix-vector multiplication with a symmetric band matrix.
+@[inline]
+pub fn dsbmv(uplo Uplo, n int, k int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) {
+	blas64.dsbmv(uplo, n, k, alpha, a, lda, x, incx, beta, mut y, incy)
+}
+
+// dtbmv performs a matrix-vector multiplication with a triangular band matrix.
+@[inline]
+pub fn dtbmv(uplo Uplo, trans_a Transpose, diag Diagonal, n int, k int, a []f64, lda int, mut x []f64, incx int) {
+	blas64.dtbmv(uplo, trans_a, diag, n, k, a, lda, mut x, incx)
+}
+
+// dtbsv solves a system of linear equations with a triangular band matrix.
+@[inline]
+pub fn dtbsv(uplo Uplo, trans_a Transpose, diag Diagonal, n int, k int, a []f64, lda int, mut x []f64, incx int) {
+	blas64.dtbsv(uplo, trans_a, diag, n, k, a, lda, mut x, incx)
+}
+
+// dtpmv performs a matrix-vector multiplication with a triangular packed matrix.
+@[inline]
+pub fn dtpmv(uplo Uplo, trans_a Transpose, diag Diagonal, n int, ap []f64, mut x []f64, incx int) {
+	blas64.dtpmv(uplo, trans_a, diag, n, ap, mut x, incx)
+}
+
+// dtpsv solves a system of linear equations with a triangular packed matrix.
+@[inline]
+pub fn dtpsv(uplo Uplo, trans_a Transpose, diag Diagonal, n int, ap []f64, mut x []f64, incx int) {
+	blas64.dtpsv(uplo, trans_a, diag, n, ap, mut x, incx)
+}
+
+// dspmv performs a matrix-vector multiplication with a symmetric packed matrix.
+@[inline]
+pub fn dspmv(uplo Uplo, n int, alpha f64, ap []f64, x []f64, incx int, beta f64, mut y []f64, incy int) {
+	blas64.dspmv(uplo, n, alpha, ap, x, incx, beta, mut y, incy)
+}
+
+// dspr performs a symmetric rank-1 update for a packed matrix.
+@[inline]
+pub fn dspr(uplo Uplo, n int, alpha f64, x []f64, incx int, mut ap []f64) {
+	blas64.dspr(uplo, n, alpha, x, incx, mut ap)
+}
+
+// dspr2 performs a symmetric rank-2 update for a packed matrix.
+@[inline]
+pub fn dspr2(uplo Uplo, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut ap []f64) {
+	blas64.dspr2(uplo, n, alpha, x, incx, y, incy, mut ap)
+}
+
+// dsyrk performs a symmetric rank-k update.
+@[inline]
+pub fn dsyrk(uplo Uplo, trans_a Transpose, n int, k int, alpha f64, a []f64, lda int, beta f64, mut c []f64, ldc int) {
+	blas64.dsyrk(uplo, trans_a, n, k, alpha, a, lda, beta, mut c, ldc)
+}