From 6f4fe3597916f6df03a05af6b45c55f1ad1824f0 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 7 Jan 2024 02:06:32 -0300 Subject: [PATCH 01/33] Refactor linear algebra module and remove unused code --- bin/test | 2 +- blas/README.md | 66 +++ .../blas => blas/blas64}/conversions.v | 2 +- {vlas/internal/blas => blas/blas64}/dgemm.v | 2 +- {vlas/internal/blas => blas/blas64}/dgemv.v | 2 +- .../blas => blas/blas64}/dgemv_test.v | 2 +- {vlas/internal/blas => blas/blas64}/error.v | 3 +- .../internal/blas => blas/blas64}/level1f64.v | 2 +- .../blas => blas/blas64}/level1f64_ddot.v | 2 +- .../internal/blas => blas/blas64}/level2f64.v | 2 +- .../internal/blas => blas/blas64}/level3f64.v | 2 +- {vlas/internal/blas => blas/blas64}/util.v | 2 +- {vlas => blas}/cblas.h | 0 .../cflags_d_vsl_blas_cblas.v | 6 +- {vlas => blas}/conversions.v | 8 +- blas/oblas_d_vsl_blas_cblas.v | 448 ++++++++++++++++++ .../oblas_notd_vsl_blas_cblas.v | 41 +- {vlas => blas}/openblas_config.h | 0 blas/v.mod | 8 + la/blas.v | 32 +- la/densesol.v | 6 +- la/matrix_ops.v | 10 +- lapack/README.md | 58 +++ .../cflags_d_vsl_lapack_lapacke copy.v | 3 +- lapack/cflags_notd_vsl_lapack_lapacke.v | 14 + lapack/lapack64/dgesv.v | 56 +++ lapack/lapack64/dgetrf.v | 51 ++ lapack/lapack64/errors.v | 178 +++++++ {vlas => lapack}/lapack_common.v | 27 +- {vlas => lapack}/lapack_default.c.v | 6 +- {vlas => lapack}/lapack_macos.c.v | 2 +- {vlas => lapack}/v.mod | 4 +- vlas/README.md | 83 ---- vlas/oblas_d_vsl_vlas_cblas.v | 448 ------------------ 34 files changed, 963 insertions(+), 615 deletions(-) create mode 100644 blas/README.md rename {vlas/internal/blas => blas/blas64}/conversions.v (95%) rename {vlas/internal/blas => blas/blas64}/dgemm.v (99%) rename {vlas/internal/blas => blas/blas64}/dgemv.v (99%) rename {vlas/internal/blas => blas/blas64}/dgemv_test.v (99%) rename {vlas/internal/blas => blas/blas64}/error.v (98%) rename {vlas/internal/blas => blas/blas64}/level1f64.v (99%) rename {vlas/internal/blas => blas/blas64}/level1f64_ddot.v (98%) rename {vlas/internal/blas => blas/blas64}/level2f64.v (99%) rename {vlas/internal/blas => blas/blas64}/level3f64.v (99%) rename {vlas/internal/blas => blas/blas64}/util.v (97%) rename {vlas => blas}/cblas.h (100%) rename vlas/cflags_d_vsl_vlas_cblas.v => blas/cflags_d_vsl_blas_cblas.v (73%) rename {vlas => blas}/conversions.v (98%) create mode 100644 blas/oblas_d_vsl_blas_cblas.v rename vlas/oblas_notd_vsl_vlas_cblas.v => blas/oblas_notd_vsl_blas_cblas.v (53%) rename {vlas => blas}/openblas_config.h (100%) create mode 100644 blas/v.mod create mode 100644 lapack/README.md rename vlas/cflags_notd_vsl_vlas_cblas.v => lapack/cflags_d_vsl_lapack_lapacke copy.v (72%) create mode 100644 lapack/cflags_notd_vsl_lapack_lapacke.v create mode 100644 lapack/lapack64/dgesv.v create mode 100644 lapack/lapack64/dgetrf.v create mode 100644 lapack/lapack64/errors.v rename {vlas => lapack}/lapack_common.v (79%) rename {vlas => lapack}/lapack_default.c.v (54%) rename {vlas => lapack}/lapack_macos.c.v (94%) rename {vlas => lapack}/v.mod (63%) delete mode 100644 vlas/README.md delete mode 100644 vlas/oblas_d_vsl_vlas_cblas.v diff --git a/bin/test b/bin/test index 07dbf064a..d4e1878ed 100755 --- a/bin/test +++ b/bin/test @@ -28,7 +28,7 @@ flags="" if [[ -n "${use_cblas}" ]]; then echo "Running tests using Open BLAS" - flags="${flags} -d vsl_vlas_cblas" + flags="${flags} -d vsl_blas_cblas" fi if [[ -n "${use_autofree}" ]]; then diff --git a/blas/README.md b/blas/README.md new file mode 100644 index 000000000..ac8c441d8 --- /dev/null +++ b/blas/README.md @@ -0,0 +1,66 @@ +# The V Basic Linear Algebra System + +This package implements Basic Linear Algebra System (BLAS) routines in V. + +| Backend | Description | Status | Compilation Flags | +| -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------ | ------------------- | +| BLAS | Pure V implementation | Stable | `NONE` | +| OpenBLAS | OpenBLAS is an optimized BLAS library based on . Check the section [OpenBLAS Backend](#openblas-backend) for more information. | Stable | `-d vsl_blas_cblas` | + +Therefore, its routines are a little more _lower level_ than the ones in the package `vsl.la`. + +## OpenBLAS Backend + +We provide a backend for the OpenBLAS library. This backend is probably the fastest one for all platforms +but it requires the installation of the OpenBLAS library. + +Use the compilation flag `-d vsl_blas_cblas` to use the OpenBLAS backend +instead of the pure V implementation +and make sure that the OpenBLAS library is installed in your system. + +Check the section below for more information about installing the OpenBLAS library. + +
+Install dependencies + +### Homebrew (macOS) + +```sh +brew install openblas +``` + +### Debian/Ubuntu GNU Linux + +`libopenblas-dev` is not needed when using the pure V backend. + +```sh +sudo apt-get install -y --no-install-recommends \ + gcc \ + gfortran \ + libopenblas-dev +``` + +### Arch Linux/Manjaro GNU Linux + +The best way of installing OpenBlas is using +[lapack-openblas](https://aur.archlinux.org/packages/lapack-openblas/). + +```sh +yay -S lapack-openblas +``` + +or + +```sh +git clone https://aur.archlinux.org/lapack-openblas.git /tmp/lapack-openblas +cd /tmp/lapack-openblas +makepkg -si +``` + +### macOS + +```sh +brew install openblas +``` + +
diff --git a/vlas/internal/blas/conversions.v b/blas/blas64/conversions.v similarity index 95% rename from vlas/internal/blas/conversions.v rename to blas/blas64/conversions.v index 2b4d0d1cc..591e7a2e8 100644 --- a/vlas/internal/blas/conversions.v +++ b/blas/blas64/conversions.v @@ -1,4 +1,4 @@ -module blas +module blas64 pub enum MemoryLayout { row_major = 101 diff --git a/vlas/internal/blas/dgemm.v b/blas/blas64/dgemm.v similarity index 99% rename from vlas/internal/blas/dgemm.v rename to blas/blas64/dgemm.v index 1af9d1337..f7bcfeb5e 100644 --- a/vlas/internal/blas/dgemm.v +++ b/blas/blas64/dgemm.v @@ -1,4 +1,4 @@ -module blas +module blas64 // import runtime import sync diff --git a/vlas/internal/blas/dgemv.v b/blas/blas64/dgemv.v similarity index 99% rename from vlas/internal/blas/dgemv.v rename to blas/blas64/dgemv.v index 94705f79c..5c2510387 100644 --- a/vlas/internal/blas/dgemv.v +++ b/blas/blas64/dgemv.v @@ -1,4 +1,4 @@ -module blas +module blas64 import vsl.float.float64 import math diff --git a/vlas/internal/blas/dgemv_test.v b/blas/blas64/dgemv_test.v similarity index 99% rename from vlas/internal/blas/dgemv_test.v rename to blas/blas64/dgemv_test.v index 4273ea257..3964eddd8 100644 --- a/vlas/internal/blas/dgemv_test.v +++ b/blas/blas64/dgemv_test.v @@ -1,4 +1,4 @@ -module blas +module blas64 fn test_dgemv_no_trans_1() { expected := [0.0, 0, 0, 0, 0] diff --git a/vlas/internal/blas/error.v b/blas/blas64/error.v similarity index 98% rename from vlas/internal/blas/error.v rename to blas/blas64/error.v index 48e99b597..70e5dddda 100644 --- a/vlas/internal/blas/error.v +++ b/blas/blas64/error.v @@ -1,7 +1,8 @@ -module blas +module blas64 // Panic strings used during parameter checks. // This list is duplicated in netlib/blas/netlib. Keep in sync. + pub const zero_incx = 'blas: zero x index increment' pub const zero_incy = 'blas: zero y index increment' diff --git a/vlas/internal/blas/level1f64.v b/blas/blas64/level1f64.v similarity index 99% rename from vlas/internal/blas/level1f64.v rename to blas/blas64/level1f64.v index d42de1895..4d506cee0 100644 --- a/vlas/internal/blas/level1f64.v +++ b/blas/blas64/level1f64.v @@ -1,4 +1,4 @@ -module blas +module blas64 import vsl.float.float64 import math diff --git a/vlas/internal/blas/level1f64_ddot.v b/blas/blas64/level1f64_ddot.v similarity index 98% rename from vlas/internal/blas/level1f64_ddot.v rename to blas/blas64/level1f64_ddot.v index 2f413b178..3fd1310da 100644 --- a/vlas/internal/blas/level1f64_ddot.v +++ b/blas/blas64/level1f64_ddot.v @@ -1,4 +1,4 @@ -module blas +module blas64 import vsl.float.float64 diff --git a/vlas/internal/blas/level2f64.v b/blas/blas64/level2f64.v similarity index 99% rename from vlas/internal/blas/level2f64.v rename to blas/blas64/level2f64.v index 9058f13d6..bee249f55 100644 --- a/vlas/internal/blas/level2f64.v +++ b/blas/blas64/level2f64.v @@ -1,4 +1,4 @@ -module blas +module blas64 import math import vsl.float.float64 diff --git a/vlas/internal/blas/level3f64.v b/blas/blas64/level3f64.v similarity index 99% rename from vlas/internal/blas/level3f64.v rename to blas/blas64/level3f64.v index 0ea444976..e1af75000 100644 --- a/vlas/internal/blas/level3f64.v +++ b/blas/blas64/level3f64.v @@ -1,4 +1,4 @@ -module blas +module blas64 import vsl.float.float64 import math diff --git a/vlas/internal/blas/util.v b/blas/blas64/util.v similarity index 97% rename from vlas/internal/blas/util.v rename to blas/blas64/util.v index 28e0e9a78..41837e456 100644 --- a/vlas/internal/blas/util.v +++ b/blas/blas64/util.v @@ -1,4 +1,4 @@ -module blas +module blas64 // [SD]gemm behavior constants. These are kept here to keep them out of the // way during single precision code genration. diff --git a/vlas/cblas.h b/blas/cblas.h similarity index 100% rename from vlas/cblas.h rename to blas/cblas.h diff --git a/vlas/cflags_d_vsl_vlas_cblas.v b/blas/cflags_d_vsl_blas_cblas.v similarity index 73% rename from vlas/cflags_d_vsl_vlas_cblas.v rename to blas/cflags_d_vsl_blas_cblas.v index 0c3432256..6d038411c 100644 --- a/vlas/cflags_d_vsl_vlas_cblas.v +++ b/blas/cflags_d_vsl_blas_cblas.v @@ -1,4 +1,4 @@ -module vlas +module blas #flag linux -O2 -I/usr/local/include -I/usr/lib #flag linux -L/usr/local/lib -L/usr/lib @@ -7,11 +7,9 @@ module vlas // Intel, M1 brew, and MacPorts #flag darwin -I/usr/local/opt/openblas/include -I/opt/homebrew/opt/openblas/include -I/opt/local/opt/openblas/include #flag darwin -L/usr/local/opt/openblas/lib -L/opt/homebrew/opt/openblas/lib -L/opt/local/opt/openblas/lib -#flag darwin -L/usr/local/opt/lapack/lib -L/opt/homebrew/opt/lapack/lib -L/opt/local/opt/lapack/lib #flag -I@VMODROOT -#flag -lopenblas -llapacke +#flag -lopenblas $if macos { - #include #include } diff --git a/vlas/conversions.v b/blas/conversions.v similarity index 98% rename from vlas/conversions.v rename to blas/conversions.v index c3a097d9f..154edade5 100644 --- a/vlas/conversions.v +++ b/blas/conversions.v @@ -1,16 +1,16 @@ -module vlas +module blas import strconv import math import math.complex import vsl.errors -import vsl.vlas.internal.blas +import vsl.blas.blas64 -pub fn c_trans(trans bool) blas.Transpose { +pub fn c_trans(trans bool) blas64.Transpose { return if trans { .trans } else { .no_trans } } -pub fn c_uplo(up bool) blas.Uplo { +pub fn c_uplo(up bool) blas64.Uplo { return if up { .upper } else { .lower } } diff --git a/blas/oblas_d_vsl_blas_cblas.v b/blas/oblas_d_vsl_blas_cblas.v new file mode 100644 index 000000000..b4483c995 --- /dev/null +++ b/blas/oblas_d_vsl_blas_cblas.v @@ -0,0 +1,448 @@ +module blas + +import vsl.blas64.blas64 + +fn C.openblas_set_num_threads(n int) + +fn C.cblas_sdsdot(n int, alpha f32, x &f32, incx int, y &f32, incy int) f32 +fn C.cblas_dsdot(n int, x &f32, incx int, y &f32, incy int) f64 +fn C.cblas_sdot(n int, x &f32, incx int, y &f32, incy int) f32 +fn C.cblas_ddot(n int, x &f64, incx int, y &f64, incy int) f64 +fn C.cblas_cdotu(n int, x voidptr, incx int, y voidptr, incy int) f32 +fn C.cblas_cdotc(n int, x voidptr, incx int, y voidptr, incy int) f32 +fn C.cblas_zdotu(n int, x voidptr, incx int, y voidptr, incy int) f64 +fn C.cblas_zdotc(n int, x voidptr, incx int, y voidptr, incy int) f64 +fn C.cblas_cdotu_sub(n int, x voidptr, incx int, y voidptr, incy int, ret voidptr) +fn C.cblas_cdotc_sub(n int, x voidptr, incx int, y voidptr, incy int, ret voidptr) +fn C.cblas_zdotu_sub(n int, x voidptr, incx int, y voidptr, incy int, ret voidptr) +fn C.cblas_zdotc_sub(n int, x voidptr, incx int, y voidptr, incy int, ret voidptr) +fn C.cblas_sasum(n int, x &f32, incx int) f32 +fn C.cblas_dasum(n int, x &f64, incx int) f64 +fn C.cblas_scasum(n int, x voidptr, incx int) f32 +fn C.cblas_dzasum(n int, x voidptr, incx int) f64 +fn C.cblas_ssum(n int, x &f32, incx int) f32 +fn C.cblas_dsum(n int, x &f64, incx int) f64 +fn C.cblas_scsum(n int, x voidptr, incx int) f32 +fn C.cblas_dzsum(n int, x voidptr, incx int) f64 +fn C.cblas_snrm2(n int, x &f32, incx int) f32 +fn C.cblas_dnrm2(n int, x &f64, incx int) f64 +fn C.cblas_scnrm2(n int, x voidptr, incx int) f32 +fn C.cblas_dznrm2(n int, x voidptr, incx int) f64 + +fn C.cblas_isamax(n int, x &f32, incx int) int +fn C.cblas_idamax(n int, x &f64, incx int) int +fn C.cblas_icamax(n int, x voidptr, incx int) int +fn C.cblas_izamax(n int, x voidptr, incx int) int +fn C.cblas_isamin(n int, x &f32, incx int) int +fn C.cblas_idamin(n int, x &f64, incx int) int +fn C.cblas_icamin(n int, x voidptr, incx int) int +fn C.cblas_izamin(n int, x voidptr, incx int) int +fn C.cblas_ismax(n int, x &f32, incx int) int +fn C.cblas_idmax(n int, x &f64, incx int) int +fn C.cblas_icmax(n int, x voidptr, incx int) int +fn C.cblas_izmax(n int, x voidptr, incx int) int +fn C.cblas_ismin(n int, x &f32, incx int) int +fn C.cblas_idmin(n int, x &f64, incx int) int +fn C.cblas_icmin(n int, x voidptr, incx int) int +fn C.cblas_izmin(n int, x voidptr, incx int) int +fn C.cblas_saxpy(n int, alpha f32, x &f32, incx int, y &f32, incy int) +fn C.cblas_daxpy(n int, alpha f64, x &f64, incx int, y &f64, incy int) +fn C.cblas_caxpy(n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int) +fn C.cblas_zaxpy(n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int) +fn C.cblas_scopy(n int, x &f32, incx int, y &f32, incy int) +fn C.cblas_dcopy(n int, x &f64, incx int, y &f64, incy int) +fn C.cblas_ccopy(n int, x voidptr, incx int, y voidptr, incy int) +fn C.cblas_zcopy(n int, x voidptr, incx int, y voidptr, incy int) +fn C.cblas_sswap(n int, x &f32, incx int, y &f32, incy int) +fn C.cblas_dswap(n int, x &f64, incx int, y &f64, incy int) +fn C.cblas_cswap(n int, x voidptr, incx int, y voidptr, incy int) +fn C.cblas_zswap(n int, x voidptr, incx int, y voidptr, incy int) +fn C.cblas_srot(n int, x &f32, incx int, y &f32, incy int, c f32, s f32) +fn C.cblas_drot(n int, x &f64, incx int, y &f64, incy int, c f64, s f64) +fn C.cblas_srotg(a &f32, b &f32, c &f32, s &f32) +fn C.cblas_drotg(a &f64, b &f64, c &f64, s &f64) +fn C.cblas_srotm(n int, x &f32, incx int, y &f32, incy int, p &f32) +fn C.cblas_drotm(n int, x &f64, incx int, y &f64, incy int, p &f64) +fn C.cblas_srotmg(d1 &f32, d2 &f32, b1 &f32, b2 f32, p &f32) +fn C.cblas_drotmg(d1 &f64, d2 &f64, b1 &f64, b2 f64, p &f64) +fn C.cblas_sscal(n int, alpha f32, x &f32, incx int) +fn C.cblas_dscal(n int, alpha f64, x &f64, incx int) +fn C.cblas_cscal(n int, alpha voidptr, x voidptr, incx int) +fn C.cblas_zscal(n int, alpha voidptr, x voidptr, incx int) +fn C.cblas_csscal(n int, alpha f32, x voidptr, incx int) +fn C.cblas_zdscal(n int, alpha f64, x voidptr, incx int) +fn C.cblas_sgemv(order blas64.MemoryLayout, trans blas64.Transpose, m int, n int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int) +fn C.cblas_dgemv(order blas64.MemoryLayout, trans blas64.Transpose, m int, n int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int) +fn C.cblas_cgemv(order blas64.MemoryLayout, trans blas64.Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_zgemv(order blas64.MemoryLayout, trans blas64.Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_sger(order blas64.MemoryLayout, m int, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32, lda int) +fn C.cblas_dger(order blas64.MemoryLayout, m int, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64, lda int) +fn C.cblas_cgeru(order blas64.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) +fn C.cblas_cgerc(order blas64.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) +fn C.cblas_zgeru(order blas64.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) +fn C.cblas_zgerc(order blas64.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) +fn C.cblas_strsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a &f32, lda int, x &f32, incx int) +fn C.cblas_dtrsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a &f64, lda int, x &f64, incx int) +fn C.cblas_ctrsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a voidptr, lda int, x voidptr, incx int) +fn C.cblas_ztrsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a voidptr, lda int, x voidptr, incx int) +fn C.cblas_strmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a &f32, lda int, x &f32, incx int) +fn C.cblas_dtrmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a &f64, lda int, x &f64, incx int) +fn C.cblas_ctrmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a voidptr, lda int, x voidptr, incx int) +fn C.cblas_ztrmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a voidptr, lda int, x voidptr, incx int) +fn C.cblas_ssyr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x &f32, incx int, a &f32, lda int) +fn C.cblas_dsyr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x &f64, incx int, a &f64, lda int) +fn C.cblas_cher(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x voidptr, incx int, a voidptr, lda int) +fn C.cblas_zher(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x voidptr, incx int, a voidptr, lda int) +fn C.cblas_ssyr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32, lda int) +fn C.cblas_dsyr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64, lda int) +fn C.cblas_cher2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) +fn C.cblas_zher2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) +fn C.cblas_sgbmv(order blas64.MemoryLayout, transA blas64.Transpose, m int, n int, kl int, ku int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int) +fn C.cblas_dgbmv(order blas64.MemoryLayout, transA blas64.Transpose, m int, n int, kl int, ku int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int) +fn C.cblas_cgbmv(order blas64.MemoryLayout, transA blas64.Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_zgbmv(order blas64.MemoryLayout, transA blas64.Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_ssbmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, k int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int) +fn C.cblas_dsbmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, k int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int) +fn C.cblas_stbmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a &f32, lda int, x &f32, incx int) +fn C.cblas_dtbmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a &f64, lda int, x &f64, incx int) +fn C.cblas_ctbmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int) +fn C.cblas_ztbmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int) +fn C.cblas_stbsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a &f32, lda int, x &f32, incx int) +fn C.cblas_dtbsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a &f64, lda int, x &f64, incx int) +fn C.cblas_ctbsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int) +fn C.cblas_ztbsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int) +fn C.cblas_stpmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap &f32, x &f32, incx int) +fn C.cblas_dtpmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap &f64, x &f64, incx int) +fn C.cblas_ctpmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap voidptr, x voidptr, incx int) +fn C.cblas_ztpmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap voidptr, x voidptr, incx int) +fn C.cblas_stpsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap &f32, x &f32, incx int) +fn C.cblas_dtpsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap &f64, x &f64, incx int) +fn C.cblas_ctpsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap voidptr, x voidptr, incx int) +fn C.cblas_ztpsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap voidptr, x voidptr, incx int) +fn C.cblas_ssymv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int) +fn C.cblas_dsymv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int) +fn C.cblas_chemv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_zhemv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_sspmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, ap &f32, x &f32, incx int, beta f32, y &f32, incy int) +fn C.cblas_dspmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, ap &f64, x &f64, incx int, beta f64, y &f64, incy int) +fn C.cblas_sspr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x &f32, incx int, ap &f32) +fn C.cblas_dspr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x &f64, incx int, ap &f64) +fn C.cblas_chpr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x voidptr, incx int, a voidptr) +fn C.cblas_zhpr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x voidptr, incx int, a voidptr) +fn C.cblas_sspr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32) +fn C.cblas_dspr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64) +fn C.cblas_chpr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, AP voidptr) +fn C.cblas_zhpr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, AP voidptr) +fn C.cblas_chbmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_zhbmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_chpmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, AP voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_zhpmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, AP voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_sgemm(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int) +fn C.cblas_dgemm(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int) +fn C.cblas_cgemm(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_cgemm3m(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_zgemm(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_zgemm3m(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_ssymm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int) +fn C.cblas_dsymm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int) +fn C.cblas_csymm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_zsymm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_ssyrk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f32, a &f32, lda int, beta f32, c &f32, ldc int) +fn C.cblas_dsyrk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f64, a &f64, lda int, beta f64, c &f64, ldc int) +fn C.cblas_csyrk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, c voidptr, ldc int) +fn C.cblas_zsyrk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, c voidptr, ldc int) +fn C.cblas_ssyr2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int) +fn C.cblas_dsyr2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int) +fn C.cblas_csyr2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_zsyr2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_strmm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int) +fn C.cblas_dtrmm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int) +fn C.cblas_ctrmm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int) +fn C.cblas_ztrmm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int) +fn C.cblas_strsm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int) +fn C.cblas_dtrsm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int) +fn C.cblas_ctrsm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int) +fn C.cblas_ztrsm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int) +fn C.cblas_chemm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_zhemm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_cherk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f32, a voidptr, lda int, beta f32, c voidptr, ldc int) +fn C.cblas_zherk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f64, a voidptr, lda int, beta f64, c voidptr, ldc int) +fn C.cblas_cher2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta f32, c voidptr, ldc int) +fn C.cblas_zher2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta f64, c voidptr, ldc int) +fn C.cblas_xerbla(p int, rout &byte, form &byte, other voidptr) + +fn C.cblas_saxpby(n int, alpha f32, x &f32, incx int, beta f32, y &f32, incy int) +fn C.cblas_daxpby(n int, alpha f64, x &f64, incx int, beta f64, y &f64, incy int) +fn C.cblas_caxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_zaxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_somatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha f32, a &f32, clda int, b &f32, cldb int) +fn C.cblas_domatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha f64, a &f64, clda int, b &f64, cldb int) +fn C.cblas_comatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha &f32, a &f32, clda int, b &f32, cldb int) +fn C.cblas_zomatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha &f64, a &f64, clda int, b &f64, cldb int) +fn C.cblas_simatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha f32, a &f32, clda int, cldb int) +fn C.cblas_dimatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha f64, a &f64, clda int, cldb int) +fn C.cblas_cimatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha &f32, a &f32, clda int, cldb int) +fn C.cblas_zimatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha &f64, a &f64, clda int, cldb int) +fn C.cblas_sgeadd(corder blas64.MemoryLayout, crows int, ccols int, calpha f32, a &f32, clda int, cbeta f32, c &f32, cldc int) +fn C.cblas_dgeadd(corder blas64.MemoryLayout, crows int, ccols int, calpha f64, a &f64, clda int, cbeta f64, c &f64, cldc int) +fn C.cblas_cgeadd(corder blas64.MemoryLayout, crows int, ccols int, calpha &f32, a &f32, clda int, cbeta &f32, c &f32, cldc int) +fn C.cblas_zgeadd(corder blas64.MemoryLayout, crows int, ccols int, calpha &f64, a &f64, clda int, cbeta &f64, c &f64, cldc int) + +// set_num_threads sets the number of threads in OpenBLAS +pub fn set_num_threads(n int) { + C.openblas_set_num_threads(n) +} + +@[inline] +pub fn sdsdot(n int, alpha f32, x []f32, incx int, y []f32, incy int) f32 { + return C.cblas_sdsdot(n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn dsdot(n int, x []f32, incx int, y []f32, incy int) f64 { + return C.cblas_dsdot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn sdot(n int, x []f32, incx int, y []f32, incy int) f32 { + return C.cblas_sdot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn ddot(n int, x []f64, incx int, y []f64, incy int) f64 { + return C.cblas_ddot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn sasum(n int, x []f32, incx int) f32 { + return C.cblas_sasum(n, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn dasum(n int, x []f64, incx int) f64 { + return C.cblas_dasum(n, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn ssum(n int, x []f32, incx int) f32 { + return C.cblas_ssum(n, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn dsum(n int, x []f64, incx int) f64 { + return C.cblas_dsum(n, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn snrm2(n int, x []f32, incx int) f32 { + return C.cblas_snrm2(n, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn dnrm2(n int, x []f64, incx int) f64 { + return C.cblas_dnrm2(n, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn isamax(n int, x []f32, incx int) int { + return C.cblas_isamax(n, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn idamax(n int, x []f64, incx int) int { + return C.cblas_idamax(n, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn isamin(n int, x []f32, incx int) int { + return C.cblas_isamin(n, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn idamin(n int, x &f64, incx int) int { + return C.cblas_idamin(n, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn ismax(n int, x []f32, incx int) int { + return C.cblas_ismax(n, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn idmax(n int, x []f64, incx int) int { + return C.cblas_idmax(n, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn ismin(n int, x []f32, incx int) int { + return C.cblas_ismin(n, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn idmin(n int, x []f64, incx int) int { + return C.cblas_idmin(n, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn saxpy(n int, alpha f32, x []f32, incx int, mut y []f32, incy int) { + C.cblas_saxpy(n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn daxpy(n int, alpha f64, x []f64, incx int, mut y []f64, incy int) { + C.cblas_daxpy(n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn scopy(n int, mut x []f32, incx int, mut y []f32, incy int) { + C.cblas_scopy(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn dcopy(n int, mut x []f64, incx int, mut y []f64, incy int) { + C.cblas_dcopy(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn sswap(n int, mut x []f32, incx int, mut y []f32, incy int) { + C.cblas_sswap(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn dswap(n int, mut x []f64, incx int, mut y []f64, incy int) { + C.cblas_dswap(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn srot(n int, mut x []f32, incx int, mut y []f32, incy int, c f32, s f32) { + C.cblas_srot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, c, s) +} + +@[inline] +pub fn drot(n int, mut x []f64, incx int, mut y []f64, incy int, c f64, s f64) { + C.cblas_drot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, c, s) +} + +@[inline] +pub fn srotg(a f32, b f32, c f32, s f32) { + C.cblas_srotg(&a, &b, &c, &s) +} + +@[inline] +pub fn drotg(a f64, b f64, c f64, s f64) { + C.cblas_drotg(&a, &b, &c, &s) +} + +@[inline] +pub fn srotm(n int, x []f32, incx int, y []f32, incy int, p []f32) { + C.cblas_srotm(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, unsafe { &p[0] }) +} + +@[inline] +pub fn drotm(n int, x []f64, incx int, y []f64, incy int, p []f64) { + C.cblas_drotm(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, unsafe { &p[0] }) +} + +@[inline] +pub fn srotmg(d1 f32, d2 f32, b1 f32, b2 f32, p []f32) { + C.cblas_srotmg(&d1, &d2, &b1, b2, unsafe { &p[0] }) +} + +@[inline] +pub fn drotmg(d1 f64, d2 f64, b1 f64, b2 f32, p []f64) { + C.cblas_drotmg(&d1, &d2, &b1, b2, unsafe { &p[0] }) +} + +@[inline] +pub fn sscal(n int, alpha f32, mut x []f32, incx int) { + C.cblas_sscal(n, alpha, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn dscal(n int, alpha f64, mut x []f64, incx int) { + C.cblas_dscal(n, alpha, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn sgemv(trans bool, m int, n int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) { + C.cblas_sgemv(.row_major, c_trans(trans), m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, + incx, beta, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn dgemv(trans bool, m int, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { + C.cblas_dgemv(.row_major, c_trans(trans), m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, + incx, beta, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn sger(m int, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32, lda int) { + C.cblas_sger(.row_major, m, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, + unsafe { &a[0] }, lda) +} + +@[inline] +pub fn dger(m int, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) { + C.cblas_dger(.row_major, m, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, + unsafe { &a[0] }, lda) +} + +@[inline] +pub fn strsv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f32, lda int, mut x []f32, incx int) { + C.cblas_strsv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] }, + lda, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn dtrsv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { + C.cblas_dtrsv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] }, + lda, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn strmv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f32, lda int, mut x []f32, incx int) { + C.cblas_strmv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] }, + lda, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn dtrmv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { + C.cblas_dtrmv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] }, + lda, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn ssyr(uplo bool, n int, alpha f32, x []f32, incx int, mut a []f32, lda int) { + C.cblas_ssyr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] }, + lda) +} + +@[inline] +pub fn dsyr(uplo bool, n int, alpha f64, x []f64, incx int, mut a []f64, lda int) { + C.cblas_dsyr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] }, + lda) +} + +@[inline] +pub fn ssyr2(uplo bool, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32, lda int) { + C.cblas_ssyr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, + incy, unsafe { &a[0] }, lda) +} + +@[inline] +pub fn dsyr2(uplo bool, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) { + C.cblas_dsyr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, + incy, unsafe { &a[0] }, lda) +} + +@[inline] +pub fn sgemm(trans_a bool, trans_b bool, m int, n int, k int, alpha f32, a []f32, lda int, b []f32, ldb int, beta f32, mut cc []f32, ldc int) { + C.cblas_sgemm(.row_major, c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, unsafe { &a[0] }, + lda, unsafe { &b[0] }, ldb, beta, unsafe { &cc[0] }, ldc) +} + +@[inline] +pub fn dgemm(trans_a bool, trans_b bool, m int, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut cc []f64, ldc int) { + C.cblas_dgemm(.row_major, c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, unsafe { &a[0] }, + lda, unsafe { &b[0] }, ldb, beta, unsafe { &cc[0] }, ldc) +} diff --git a/vlas/oblas_notd_vsl_vlas_cblas.v b/blas/oblas_notd_vsl_blas_cblas.v similarity index 53% rename from vlas/oblas_notd_vsl_vlas_cblas.v rename to blas/oblas_notd_vsl_blas_cblas.v index ae4ff46d9..763995224 100644 --- a/vlas/oblas_notd_vsl_vlas_cblas.v +++ b/blas/oblas_notd_vsl_blas_cblas.v @@ -1,82 +1,83 @@ -module vlas +module blas -import vsl.vlas.internal.blas +import vsl.blas.blas64 -// set_num_threads sets the number of threads in VLAS +// set_num_threads sets the number of threads in BLAS +@[inline] pub fn set_num_threads(n int) {} @[inline] pub fn ddot(n int, x []f64, incx int, y []f64, incy int) f64 { - return blas.ddot(n, x, incx, y, incy) + return blas64.ddot(n, x, incx, y, incy) } @[inline] pub fn dasum(n int, x []f64, incx int) f64 { - return blas.dasum(n, x, incx) + return blas64.dasum(n, x, incx) } @[inline] pub fn dnrm2(n int, x []f64, incx int) f64 { - return blas.dnrm2(n, x, incx) + return blas64.dnrm2(n, x, incx) } @[inline] pub fn daxpy(n int, alpha f64, x []f64, incx int, mut y []f64, incy int) { - blas.daxpy(n, alpha, x, incx, mut y, incy) + blas64.daxpy(n, alpha, x, incx, mut y, incy) } @[inline] pub fn dcopy(n int, x []f64, incx int, mut y []f64, incy int) { - blas.dcopy(n, x, incx, mut y, incy) + blas64.dcopy(n, x, incx, mut y, incy) } @[inline] pub fn dswap(n int, mut x []f64, incx int, mut y []f64, incy int) { - blas.dswap(n, mut x, incx, mut y, incy) + blas64.dswap(n, mut x, incx, mut y, incy) } @[inline] pub fn drot(n int, mut x []f64, incx int, mut y []f64, incy int, c f64, s f64) { - blas.drot(n, mut x, incx, mut y, incy, c, s) + blas64.drot(n, mut x, incx, mut y, incy, c, s) } @[inline] pub fn dscal(n int, alpha f64, mut x []f64, incx int) { - blas.dscal(n, alpha, mut x, incx) + blas64.dscal(n, alpha, mut x, incx) } @[inline] pub fn dgemv(trans bool, m int, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { - blas.dgemv(c_trans(trans), m, n, alpha, a, lda, x, incx, beta, mut y, incy) + blas64.dgemv(c_trans(trans), m, n, alpha, a, lda, x, incx, beta, mut y, incy) } @[inline] pub fn dger(m int, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) { - blas.dger(m, n, alpha, x, incx, y, incy, mut a, lda) + blas64.dger(m, n, alpha, x, incx, y, incy, mut a, lda) } @[inline] -pub fn dtrsv(uplo bool, trans_a bool, diag blas.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { - blas.dtrsv(c_uplo(uplo), c_trans(trans_a), diag, n, a, lda, mut x, incx) +pub fn dtrsv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { + blas64.dtrsv(c_uplo(uplo), c_trans(trans_a), diag, n, a, lda, mut x, incx) } @[inline] -pub fn dtrmv(uplo bool, trans_a bool, diag blas.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { - blas.dtrmv(c_uplo(uplo), c_trans(trans_a), diag, n, a, lda, mut x, incx) +pub fn dtrmv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { + blas64.dtrmv(c_uplo(uplo), c_trans(trans_a), diag, n, a, lda, mut x, incx) } @[inline] pub fn dsyr(uplo bool, n int, alpha f64, x []f64, incx int, mut a []f64, lda int) { - blas.dsyr(c_uplo(uplo), n, alpha, x, incx, mut a, lda) + blas64.dsyr(c_uplo(uplo), n, alpha, x, incx, mut a, lda) } @[inline] pub fn dsyr2(uplo bool, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) { - blas.dsyr2(c_uplo(uplo), n, alpha, x, incx, y, incy, mut a, lda) + blas64.dsyr2(c_uplo(uplo), n, alpha, x, incx, y, incy, mut a, lda) } @[inline] pub fn dgemm(trans_a bool, trans_b bool, m int, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut cc []f64, ldc int) { - blas.dgemm(c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, a, lda, b, ldb, beta, mut + blas64.dgemm(c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, a, lda, b, ldb, beta, mut cc, ldc) } diff --git a/vlas/openblas_config.h b/blas/openblas_config.h similarity index 100% rename from vlas/openblas_config.h rename to blas/openblas_config.h diff --git a/blas/v.mod b/blas/v.mod new file mode 100644 index 000000000..0e946714d --- /dev/null +++ b/blas/v.mod @@ -0,0 +1,8 @@ +Module { + name: 'blas' + description: 'The V Basic Linear Algebra System' + version: '0.1.0' + license: 'MIT' + repo_url: 'https://github.com/vlang/vsl' + dependencies: [] +} diff --git a/la/blas.v b/la/blas.v index 53514a776..630ab49f6 100644 --- a/la/blas.v +++ b/la/blas.v @@ -1,6 +1,6 @@ module la -import vsl.vlas +import vsl.blas import math // TODO: @ulises-jeremias to remove this once https://github.com/vlang/v/issues/14047 is finished @@ -47,7 +47,7 @@ pub fn vector_dot[T](u []T, v []T) T { } return res } - return vlas.ddot(u.len, arr_to_f64arr[T](u), 1, arr_to_f64arr[T](v), 1) + return blas.ddot(u.len, arr_to_f64arr[T](u), 1, arr_to_f64arr[T](v), 1) } $else { mut res := T{} for i in 0 .. u.len { @@ -66,7 +66,7 @@ pub fn vector_add[T](alpha T, u []T, beta T, v []T) []T { cutoff := 150 if beta == 1 && n > cutoff { res = v.clone() - vlas.daxpy(n, alpha, arr_to_f64arr(u), 1, mut res, 1) + blas.daxpy(n, alpha, arr_to_f64arr(u), 1, mut res, 1) return res } m := n % 4 @@ -136,7 +136,7 @@ pub fn matrix_vector_mul[T](alpha T, a &Matrix[T], u []T) []T { } return v } - vlas.dgemv(false, a.m, a.n, alpha, arr_to_f64arr[T](a.data), a.m, arr_to_f64arr[T](u), + blas.dgemv(false, a.m, a.n, alpha, arr_to_f64arr[T](a.data), a.m, arr_to_f64arr[T](u), 1, 0.0, mut v, v.len) return v } $else { @@ -167,7 +167,7 @@ pub fn matrix_tr_vector_mul[T](alpha T, a &Matrix[T], u []T) []T { } return v } - vlas.dgemv(true, a.m, a.n, alpha, arr_to_f64arr[T](a.data), a.n, arr_to_f64arr[T](u), + blas.dgemv(true, a.m, a.n, alpha, arr_to_f64arr[T](a.data), a.n, arr_to_f64arr[T](u), 1, 0.0, mut v, v.len) return v } $else { @@ -199,7 +199,7 @@ pub fn vector_vector_tr_mul[T](alpha T, u []T, v []T) &Matrix[T] { return m } mut a := []f64{len: u.len * v.len} - vlas.dger(m.m, m.n, alpha, arr_to_f64arr[T](u), 1, arr_to_f64arr[T](v), 1, mut + blas.dger(m.m, m.n, alpha, arr_to_f64arr[T](u), 1, arr_to_f64arr[T](v), 1, mut a, int(math.max(m.m, m.n))) return Matrix.raw(u.len, v.len, a) } $else { @@ -220,7 +220,7 @@ pub fn vector_vector_tr_mul[T](alpha T, u []T, v []T) &Matrix[T] { // pub fn matrix_vector_mul_add(alpha f64, a &Matrix[f64], u []f64) []f64 { mut v := []f64{len: a.m} - vlas.dgemv(false, a.m, a.n, alpha, a.data, a.m, u, 1, 1.0, mut v, v.len) + blas.dgemv(false, a.m, a.n, alpha, a.data, a.m, u, 1, 1.0, mut v, v.len) return v } @@ -240,7 +240,7 @@ pub fn matrix_matrix_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix } return } - vlas.dgemm(false, false, a.m, b.n, a.n, alpha, a.data, a.m, b.data, b.m, 0.0, mut + blas.dgemm(false, false, a.m, b.n, a.n, alpha, a.data, a.m, b.data, b.m, 0.0, mut c.data, c.m) } @@ -260,7 +260,7 @@ pub fn matrix_tr_matrix_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Mat } return } - vlas.dgemm(true, false, a.n, b.n, a.m, alpha, a.data, a.n, b.data, b.m, 0.0, mut c.data, + blas.dgemm(true, false, a.n, b.n, a.m, alpha, a.data, a.n, b.data, b.m, 0.0, mut c.data, c.m) } @@ -269,7 +269,7 @@ pub fn matrix_tr_matrix_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Mat // c := alpha⋅a⋅bᵀ ⇒ cij := alpha * aik * bjk // pub fn matrix_matrix_tr_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) { - vlas.dgemm(false, true, a.m, b.m, a.n, alpha, a.data, a.n, b.data, b.m, 0.0, mut c.data, + blas.dgemm(false, true, a.m, b.m, a.n, alpha, a.data, a.n, b.data, b.m, 0.0, mut c.data, c.m) } @@ -278,7 +278,7 @@ pub fn matrix_matrix_tr_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Mat // c := alpha⋅aᵀ⋅bᵀ ⇒ cij := alpha * aki * bjk // pub fn matrix_tr_matrix_tr_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) { - vlas.dgemm(true, true, a.n, b.m, a.m, alpha, a.data, a.n, b.data, b.m, 0.0, mut c.data, + blas.dgemm(true, true, a.n, b.m, a.m, alpha, a.data, a.n, b.data, b.m, 0.0, mut c.data, c.m) } @@ -287,7 +287,7 @@ pub fn matrix_tr_matrix_tr_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b & // c += alpha⋅a⋅b ⇒ cij += alpha * aik * bkj // pub fn matrix_matrix_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) { - vlas.dgemm(false, false, a.m, b.n, a.n, alpha, a.data, a.n, b.data, b.m, 1.0, mut + blas.dgemm(false, false, a.m, b.n, a.n, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data, c.m) } @@ -296,7 +296,7 @@ pub fn matrix_matrix_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Mat // c += alpha⋅aᵀ⋅b ⇒ cij += alpha * aki * bkj // pub fn matrix_tr_matrix_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) { - vlas.dgemm(true, false, a.n, b.n, a.m, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data, + blas.dgemm(true, false, a.n, b.n, a.m, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data, c.m) } @@ -305,7 +305,7 @@ pub fn matrix_tr_matrix_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b & // c += alpha⋅a⋅bᵀ ⇒ cij += alpha * aik * bjk // pub fn matrix_matrix_tr_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) { - vlas.dgemm(false, true, a.m, b.m, a.n, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data, + blas.dgemm(false, true, a.m, b.m, a.n, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data, c.m) } @@ -314,7 +314,7 @@ pub fn matrix_matrix_tr_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b & // c += alpha⋅aᵀ⋅bᵀ ⇒ cij += alpha * aki * bjk // pub fn matrix_tr_matrix_tr_mul_add(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) { - vlas.dgemm(true, true, a.n, b.m, a.m, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data, + blas.dgemm(true, true, a.n, b.m, a.m, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data, c.m) } @@ -325,7 +325,7 @@ pub fn matrix_add(mut res Matrix[f64], alpha f64, a &Matrix[f64], beta f64, b &M cutoff := 150 if beta == 1 && n > cutoff { res.data = b.data.clone() - vlas.daxpy(n, alpha, a.data, 1, mut res.data, 1) + blas.daxpy(n, alpha, a.data, 1, mut res.data, 1) return } m := n % 4 diff --git a/la/densesol.v b/la/densesol.v index 8f8a14b21..10395a902 100644 --- a/la/densesol.v +++ b/la/densesol.v @@ -1,8 +1,8 @@ module la -import vsl.vlas +import vsl.lapack -// den_solve solves dense linear system using LAPACK (OpenBLaS) +// den_solve solves dense linear system using LAPACK // // Given: a ⋅ x = b find x such that x = a⁻¹ ⋅ b // @@ -16,5 +16,5 @@ pub fn den_solve(mut x []f64, a &Matrix[f64], b []f64, preserve_a bool) { x[i] = b[i] } ipiv := []int{len: a_.m} - vlas.dgesv(a_.m, 1, mut a_.data, a_.m, ipiv, mut x, 1) + lapack.dgesv(a_.m, 1, mut a_.data, a_.m, ipiv, mut x, 1) } diff --git a/la/matrix_ops.v b/la/matrix_ops.v index 9e8cfa1f8..74dc6ea28 100644 --- a/la/matrix_ops.v +++ b/la/matrix_ops.v @@ -1,7 +1,7 @@ module la import vsl.errors -import vsl.vlas +import vsl.lapack import math // det computes the determinant of matrix using the LU factorization @@ -13,7 +13,7 @@ pub fn matrix_det(o &Matrix[f64]) f64 { } mut ai := o.data.clone() ipiv := []int{len: int(math.min(o.m, o.n))} - vlas.dgetrf(o.m, o.n, mut ai, o.m, ipiv) // NOTE: ipiv are 1-based indices + lapack.dgetrf(o.m, o.n, mut ai, o.m, ipiv) // NOTE: ipiv are 1-based indices mut det := 1.0 for i in 0 .. o.m { if ipiv[i] - 1 == i { // NOTE: ipiv are 1-based indices @@ -90,7 +90,7 @@ pub fn matrix_svd(mut s []f64, mut u Matrix[f64], mut vt Matrix[f64], mut a Matr if copy_a { acpy = a.clone() } - vlas.dgesvd(&char('A'.str), &char('A'.str), a.m, a.n, acpy.data, 1, s, u.data, a.m, + lapack.dgesvd(&char('A'.str), &char('A'.str), a.m, a.n, acpy.data, 1, s, u.data, a.m, vt.data, a.n, superb) } @@ -108,7 +108,7 @@ pub fn matrix_inv(mut ai Matrix[f64], mut a Matrix[f64], calc_det bool) f64 { if a.m == a.n { ai.data = a.data.clone() ipiv := []int{len: int(math.min(a.m, a.n))} - vlas.dgetrf(a.m, a.n, mut ai.data, a.m, ipiv) // NOTE: ipiv are 1-based indices + lapack.dgetrf(a.m, a.n, mut ai.data, a.m, ipiv) // NOTE: ipiv are 1-based indices if calc_det { det = 1.0 for i := 0; i < a.m; i++ { @@ -119,7 +119,7 @@ pub fn matrix_inv(mut ai Matrix[f64], mut a Matrix[f64], calc_det bool) f64 { } } } - vlas.dgetri(a.n, mut ai.data, a.m, ipiv) + lapack.dgetri(a.n, mut ai.data, a.m, ipiv) return det } // singular value decomposition diff --git a/lapack/README.md b/lapack/README.md new file mode 100644 index 000000000..fccedfdfd --- /dev/null +++ b/lapack/README.md @@ -0,0 +1,58 @@ +# The V Linear Algebra Package + +This package implements Linear Algebra routines in V. + +| Backend | Description | Status | Compilation Flags | +| -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------ | ------------------- | +| BLAS | Pure V implementation | Stable | `NONE` | +| LAPACKE | LAPACKE is a C interface to LAPACK. It is a standard part of the LAPACK distribution. Check the section [LAPACKE Backend](#lapacke-backend) for more information. | Stable | `-d vsl_lapack_lapacke` | + +Therefore, its routines are a little more _lower level_ than the ones in the package `vsl.la`. + +## LAPACKE Backend + +We provide a backend for the LAPACKE library. This backend is probably the fastest one for all platforms +but it requires the installation of the LAPACKE library. + +Use the compilation flag `-d vsl_lapack_lapacke` to use the LAPACKE backend +instead of the pure V implementation +and make sure that the LAPACKE library is installed in your system. + +Check the section below for more information about installing the LAPACKE library. + +
+Install dependencies + +### Homebrew (macOS) + +```sh +brew install lapack +``` + +### Debian/Ubuntu GNU Linux + +```sh +sudo apt-get install -y --no-install-recommends \ + gcc \ + gfortran \ + liblapacke-dev +``` + +### Arch Linux/Manjaro GNU Linux + +The best way of installing LAPACKE is using +[lapack-openblas](https://aur.archlinux.org/packages/lapack-openblas/). + +```sh +yay -S lapack-openblas +``` + +or + +```sh +git clone https://aur.archlinux.org/lapack-openblas.git /tmp/lapack-openblas +cd /tmp/lapack-openblas +makepkg -si +``` + +
diff --git a/vlas/cflags_notd_vsl_vlas_cblas.v b/lapack/cflags_d_vsl_lapack_lapacke copy.v similarity index 72% rename from vlas/cflags_notd_vsl_vlas_cblas.v rename to lapack/cflags_d_vsl_lapack_lapacke copy.v index a25a3ec30..081f788db 100644 --- a/vlas/cflags_notd_vsl_vlas_cblas.v +++ b/lapack/cflags_d_vsl_lapack_lapacke copy.v @@ -1,11 +1,10 @@ -module vlas +module lapack #flag linux -O2 -I/usr/local/include -I/usr/lib #flag linux -L/usr/local/lib -L/usr/lib #flag windows -O2 #flag windows -lgfortran // Intel, M1 brew, and MacPorts -#flag darwin -I/usr/local/opt/lapack/include -I/opt/homebrew/opt/lapack/include -I/opt/local/opt/lapack/include #flag darwin -L/usr/local/opt/lapack/lib -L/opt/homebrew/opt/lapack/lib -L/opt/local/opt/lapack/lib #flag -I@VMODROOT #flag -llapacke diff --git a/lapack/cflags_notd_vsl_lapack_lapacke.v b/lapack/cflags_notd_vsl_lapack_lapacke.v new file mode 100644 index 000000000..081f788db --- /dev/null +++ b/lapack/cflags_notd_vsl_lapack_lapacke.v @@ -0,0 +1,14 @@ +module lapack + +#flag linux -O2 -I/usr/local/include -I/usr/lib +#flag linux -L/usr/local/lib -L/usr/lib +#flag windows -O2 +#flag windows -lgfortran +// Intel, M1 brew, and MacPorts +#flag darwin -L/usr/local/opt/lapack/lib -L/opt/homebrew/opt/lapack/lib -L/opt/local/opt/lapack/lib +#flag -I@VMODROOT +#flag -llapacke + +$if macos { + #include +} diff --git a/lapack/lapack64/dgesv.v b/lapack/lapack64/dgesv.v new file mode 100644 index 000000000..dc8b61bdf --- /dev/null +++ b/lapack/lapack64/dgesv.v @@ -0,0 +1,56 @@ +module lapack + +import math +import vsl.blas.blas64 + +// dgesv computes the solution to a real system of linear equations +// +// A * X = B +// +// where A is an n×n matrix and X and B are n×nrhs matrices. +// +// The LU decomposition with partial pivoting and row interchanges is used to +// factor A as +// +// A = P * L * U +// +// where P is a permutation matrix, L is unit lower triangular, and U is upper +// triangular. On return, the factors L and U are stored in a; the unit diagonal +// elements of L are not stored. The row pivot indices that define the +// permutation matrix P are stored in ipiv. +// +// The factored form of A is then used to solve the system of equations A * X = +// B. On entry, b contains the right hand side matrix B. On return, if ok is +// true, b contains the solution matrix X. +pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb int) { + if n < 0 { + panic(n_lt0) + } + if nrhs < 0 { + panic(nrhs_lt0) + } + if lda < math.max(1, n) { + panic(bad_ld_a) + } + if ldb < math.max(1, n) { + panic(bad_ld_b) + } + + // Quick return if possible. + if n == 0 || nrhs == 0 { + return + } + + if a.len < (n - 1) * lda + n { + panic(short_ab) + } + if ipiv.len < n { + panic(bad_len_ipiv) + } + if b.len < (n - 1) * ldb + nrhs { + panic(short_b) + } + + dgetrf(n, n, mut a, lda, ipiv) + dgetrs(.no_trans, n, nrhs, mut a, lda, ipiv, mut b, ldb) +} diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v new file mode 100644 index 000000000..54e383eac --- /dev/null +++ b/lapack/lapack64/dgetrf.v @@ -0,0 +1,51 @@ +module lapack + +import math +import vsl.blas.blas64 + +// dgetrf computes the LU decomposition of an m×n matrix A using partial +// pivoting with row interchanges. +// +// The LU decomposition is a factorization of A into +// +// A = P * L * U +// +// where P is a permutation matrix, L is a lower triangular with unit diagonal +// elements (lower trapezoidal if m > n), and U is upper triangular (upper +// trapezoidal if m < n). +// +// On entry, a contains the matrix A. On return, L and U are stored in place +// into a, and P is represented by ipiv. +// +// ipiv contains a sequence of row interchanges. It indicates that row i of the +// matrix was interchanged with ipiv[i]. ipiv must have length min(m,n), and +// Dgetrf will panic otherwise. ipiv is zero-indexed. +// +// Dgetrf returns whether the matrix A is nonsingular. The LU decomposition will +// be computed regardless of the singularity of A, but the result should not be +// used to solve a system of equation. +pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) { + mn := math.min(m, n) + + if m < 0 { + panic(m_lt0) + } + if n < 0 { + panic(n_lt0) + } + if lda < math.max(1, n) { + panic(bad_lda) + } + + // quick return if possible + if mn == 0 { + return + } + + if a.len < (m - 1) * lda + n { + panic(short_a) + } + if ipiv.len < mn { + panic(bad_len_ipiv) + } +} diff --git a/lapack/lapack64/errors.v b/lapack/lapack64/errors.v new file mode 100644 index 000000000..d17defc08 --- /dev/null +++ b/lapack/lapack64/errors.v @@ -0,0 +1,178 @@ +module lapack + +// This list is duplicated in netlib/lapack/netlib. Keep in sync. + +// Panic strings for bad enumeration values. +pub const bad_apply_ortho = 'lapack: bad ApplyOrtho' +pub const bad_balance_job = 'lapack: bad BalanceJob' +pub const bad_diag = 'lapack: bad Diag' +pub const bad_direct = 'lapack: bad Direct' +pub const bad_ev_comp = 'lapack: bad EVComp' +pub const bad_ev_how_many = 'lapack: bad EVHowMany' +pub const bad_ev_job = 'lapack: bad EVJob' +pub const bad_ev_side = 'lapack: bad EVSide' +pub const bad_gsvd_job = 'lapack: bad GSVDJob' +pub const bad_gen_ortho = 'lapack: bad GenOrtho' +pub const bad_left_ev_job = 'lapack: bad LeftEVJob' +pub const bad_matrix_type = 'lapack: bad MatrixType' +pub const bad_maximize_norm_x_job = 'lapack: bad MaximizeNormXJob' +pub const bad_norm = 'lapack: bad Norm' +pub const bad_ortho_comp = 'lapack: bad OrthoComp' +pub const bad_pivot = 'lapack: bad Pivot' +pub const bad_right_ev_job = 'lapack: bad RightEVJob' +pub const bad_svd_job = 'lapack: bad SVDJob' +pub const bad_schur_comp = 'lapack: bad SchurComp' +pub const bad_schur_job = 'lapack: bad SchurJob' +pub const bad_side = 'lapack: bad Side' +pub const bad_sort = 'lapack: bad Sort' +pub const bad_store_v = 'lapack: bad StoreV' +pub const bad_trans = 'lapack: bad Trans' +pub const bad_update_schur_comp = 'lapack: bad UpdateSchurComp' +pub const bad_uplo = 'lapack: bad Uplo' +pub const both_svd_over = 'lapack: both jobU and jobVT are lapack.SVDOverwrite' + +// Panic strings for bad numerical and string values. +pub const bad_ifst = 'lapack: ifst out of range' +pub const bad_ihi = 'lapack: ihi out of range' +pub const bad_ihiz = 'lapack: ihiz out of range' +pub const bad_ilo = 'lapack: ilo out of range' +pub const bad_iloz = 'lapack: iloz out of range' +pub const bad_ilst = 'lapack: ilst out of range' +pub const bad_isave = 'lapack: bad isave value' +pub const bad_ispec = 'lapack: bad ispec value' +pub const bad_j1 = 'lapack: j1 out of range' +pub const bad_jpvt = 'lapack: bad element of jpvt' +pub const bad_k1 = 'lapack: k1 out of range' +pub const bad_k2 = 'lapack: k2 out of range' +pub const bad_kacc22 = 'lapack: invalid value of kacc22' +pub const bad_kbot = 'lapack: kbot out of range' +pub const bad_ktop = 'lapack: ktop out of range' +pub const bad_l_work = 'lapack: insufficient declared workspace length' +pub const bad_mm = 'lapack: mm out of range' +pub const bad_n1 = 'lapack: bad value of n1' +pub const bad_n2 = 'lapack: bad value of n2' +pub const bad_na = 'lapack: bad value of na' +pub const bad_name = 'lapack: bad name' +pub const bad_nh = 'lapack: bad value of nh' +pub const bad_nw = 'lapack: bad value of nw' +pub const bad_pp = 'lapack: bad value of pp' +pub const bad_shifts = 'lapack: bad shifts' +pub const i0lt0 = 'lapack: i0 < 0' +pub const k_gtm = 'lapack: k > m' +pub const k_gtn = 'lapack: k > n' +pub const k_lt0 = 'lapack: k < 0' +pub const k_lt1 = 'lapack: k < 1' +pub const kd_lt0 = 'lapack: kd < 0' +pub const kl_lt0 = 'lapack: kl < 0' +pub const ku_lt0 = 'lapack: ku < 0' +pub const m_gtn = 'lapack: m > n' +pub const m_lt0 = 'lapack: m < 0' +pub const mm_lt0 = 'lapack: mm < 0' +pub const n0lt0 = 'lapack: n0 < 0' +pub const n_gtm = 'lapack: n > m' +pub const n_lt0 = 'lapack: n < 0' +pub const n_lt1 = 'lapack: n < 1' +pub const n_ltm = 'lapack: n < m' +pub const nan_c_from = 'lapack: cfrom is NaN' +pub const nan_c_to = 'lapack: cto is NaN' +pub const nb_gtm = 'lapack: nb > m' +pub const nb_gtn = 'lapack: nb > n' +pub const nb_lt0 = 'lapack: nb < 0' +pub const ncc_lt0 = 'lapack: ncc < 0' +pub const ncvt_lt0 = 'lapack: ncvt < 0' +pub const neg_a_norm = 'lapack: anorm < 0' +pub const neg_z = 'lapack: negative z value' +pub const nh_lt0 = 'lapack: nh < 0' +pub const not_isolated = 'lapack: block is not isolated' +pub const nrhs_lt0 = 'lapack: nrhs < 0' +pub const nru_lt0 = 'lapack: nru < 0' +pub const nshfts_lt0 = 'lapack: nshfts < 0' +pub const nshfts_odd = 'lapack: nshfts must be even' +pub const nv_lt0 = 'lapack: nv < 0' +pub const offset_gtm = 'lapack: offset > m' +pub const offset_lt0 = 'lapack: offset < 0' +pub const p_lt0 = 'lapack: p < 0' +pub const recur_lt0 = 'lapack: recur < 0' +pub const zero_c_from = 'lapack: zero cfrom' + +// Panic strings for bad slice lengths. +pub const bad_len_alpha = 'lapack: bad length of alpha' +pub const bad_len_beta = 'lapack: bad length of beta' +pub const bad_len_ipiv = 'lapack: bad length of ipiv' +pub const bad_len_jpiv = 'lapack: bad length of jpiv' +pub const bad_len_jpvt = 'lapack: bad length of jpvt' +pub const bad_len_k = 'lapack: bad length of k' +pub const bad_len_piv = 'lapack: bad length of piv' +pub const bad_len_selected = 'lapack: bad length of selected' +pub const bad_len_si = 'lapack: bad length of si' +pub const bad_len_sr = 'lapack: bad length of sr' +pub const bad_len_tau = 'lapack: bad length of tau' +pub const bad_len_wi = 'lapack: bad length of wi' +pub const bad_len_wr = 'lapack: bad length of wr' + +// Panic strings for insufficient slice lengths. +pub const short_a = 'lapack: insufficient length of a' +pub const short_ab = 'lapack: insufficient length of ab' +pub const short_auxv = 'lapack: insufficient length of auxv' +pub const short_b = 'lapack: insufficient length of b' +pub const short_c = 'lapack: insufficient length of c' +pub const short_c_norm = 'lapack: insufficient length of cnorm' +pub const short_d = 'lapack: insufficient length of d' +pub const short_dl = 'lapack: insufficient length of dl' +pub const short_du = 'lapack: insufficient length of du' +pub const short_e = 'lapack: insufficient length of e' +pub const short_f = 'lapack: insufficient length of f' +pub const short_h = 'lapack: insufficient length of h' +pub const short_i_work = 'lapack: insufficient length of iwork' +pub const short_isgn = 'lapack: insufficient length of isgn' +pub const short_q = 'lapack: insufficient length of q' +pub const short_rhs = 'lapack: insufficient length of rhs' +pub const short_s = 'lapack: insufficient length of s' +pub const short_scale = 'lapack: insufficient length of scale' +pub const short_t = 'lapack: insufficient length of t' +pub const short_tau = 'lapack: insufficient length of tau' +pub const short_tau_p = 'lapack: insufficient length of tauP' +pub const short_tau_q = 'lapack: insufficient length of tauQ' +pub const short_u = 'lapack: insufficient length of u' +pub const short_v = 'lapack: insufficient length of v' +pub const short_vl = 'lapack: insufficient length of vl' +pub const short_vr = 'lapack: insufficient length of vr' +pub const short_vt = 'lapack: insufficient length of vt' +pub const short_vn1 = 'lapack: insufficient length of vn1' +pub const short_vn2 = 'lapack: insufficient length of vn2' +pub const short_w = 'lapack: insufficient length of w' +pub const short_wh = 'lapack: insufficient length of wh' +pub const short_wv = 'lapack: insufficient length of wv' +pub const short_wi = 'lapack: insufficient length of wi' +pub const short_work = 'lapack: insufficient length of work' +pub const short_wr = 'lapack: insufficient length of wr' +pub const short_x = 'lapack: insufficient length of x' +pub const short_y = 'lapack: insufficient length of y' +pub const short_z = 'lapack: insufficient length of z' + +// Panic strings for bad leading dimensions of matrices. +pub const bad_ld_a = 'lapack: bad leading dimension of A' +pub const bad_ld_b = 'lapack: bad leading dimension of B' +pub const bad_ld_c = 'lapack: bad leading dimension of C' +pub const bad_ld_f = 'lapack: bad leading dimension of F' +pub const bad_ld_h = 'lapack: bad leading dimension of H' +pub const bad_ld_q = 'lapack: bad leading dimension of Q' +pub const bad_ld_t = 'lapack: bad leading dimension of T' +pub const bad_ld_u = 'lapack: bad leading dimension of U' +pub const bad_ld_v = 'lapack: bad leading dimension of V' +pub const bad_ld_vl = 'lapack: bad leading dimension of VL' +pub const bad_ld_vr = 'lapack: bad leading dimension of VR' +pub const bad_ld_vt = 'lapack: bad leading dimension of VT' +pub const bad_ld_w = 'lapack: bad leading dimension of W' +pub const bad_ld_wh = 'lapack: bad leading dimension of WH' +pub const bad_ld_wv = 'lapack: bad leading dimension of WV' +pub const bad_ld_work = 'lapack: bad leading dimension of Work' +pub const bad_ld_x = 'lapack: bad leading dimension of X' +pub const bad_ld_y = 'lapack: bad leading dimension of Y' +pub const bad_ld_z = 'lapack: bad leading dimension of Z' + +// Panic strings for bad vector increments. +pub const abs_inc_not_one = 'lapack: increment not one or negative one' +pub const bad_inc_x = 'lapack: incXpub const <= 0' +pub const bad_inc_y = 'lapack: incYpub const <= 0' +pub const zero_inc_v = 'lapack:pub const incvpub const == 0' diff --git a/vlas/lapack_common.v b/lapack/lapack_common.v similarity index 79% rename from vlas/lapack_common.v rename to lapack/lapack_common.v index 8a30e8f5e..14d291ee0 100644 --- a/vlas/lapack_common.v +++ b/lapack/lapack_common.v @@ -1,25 +1,26 @@ -module vlas +module lapack import vsl.errors -import vsl.vlas.internal.blas +import vsl.blas +import vsl.blas.blas64 -fn C.LAPACKE_dgesv(matrix_layout blas.MemoryLayout, n int, nrhs int, a &f64, lda int, ipiv &int, b &f64, ldb int) int +fn C.LAPACKE_dgesv(matrix_layout blas64.MemoryLayout, n int, nrhs int, a &f64, lda int, ipiv &int, b &f64, ldb int) int -fn C.LAPACKE_dgesvd(matrix_layout blas.MemoryLayout, jobu &char, jobvt &char, m int, n int, a &f64, lda int, s &f64, u &f64, ldu int, vt &f64, ldvt int, superb &f64) int +fn C.LAPACKE_dgesvd(matrix_layout blas64.MemoryLayout, jobu &char, jobvt &char, m int, n int, a &f64, lda int, s &f64, u &f64, ldu int, vt &f64, ldvt int, superb &f64) int -fn C.LAPACKE_dgetrf(matrix_layout blas.MemoryLayout, m int, n int, a &f64, lda int, ipiv &int) int +fn C.LAPACKE_dgetrf(matrix_layout blas64.MemoryLayout, m int, n int, a &f64, lda int, ipiv &int) int -fn C.LAPACKE_dgetri(matrix_layout blas.MemoryLayout, n int, a &f64, lda int, ipiv &int) int +fn C.LAPACKE_dgetri(matrix_layout blas64.MemoryLayout, n int, a &f64, lda int, ipiv &int) int -fn C.LAPACKE_dpotrf(matrix_layout blas.MemoryLayout, up u32, n int, a &f64, lda int) int +fn C.LAPACKE_dpotrf(matrix_layout blas64.MemoryLayout, up u32, n int, a &f64, lda int) int -fn C.LAPACKE_dgeev(matrix_layout blas.MemoryLayout, calc_vl &char, calc_vr &char, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int +fn C.LAPACKE_dgeev(matrix_layout blas64.MemoryLayout, calc_vl &char, calc_vr &char, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int -fn C.LAPACKE_dsyev(matrix_layout blas.MemoryLayout, jobz byte, uplo byte, n int, a &f64, lda int, w &f64, work &f64, lwork int) int +fn C.LAPACKE_dsyev(matrix_layout blas64.MemoryLayout, jobz byte, uplo byte, n int, a &f64, lda int, w &f64, work &f64, lwork int) int -fn C.LAPACKE_dgebal(matrix_layout blas.MemoryLayout, job &char, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int +fn C.LAPACKE_dgebal(matrix_layout blas64.MemoryLayout, job &char, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int -fn C.LAPACKE_dgehrd(matrix_layout blas.MemoryLayout, n int, ilo int, ihi int, a &f64, lda int, tau &f64, work &f64, lwork int) int +fn C.LAPACKE_dgehrd(matrix_layout blas64.MemoryLayout, n int, ilo int, ihi int, a &f64, lda int, tau &f64, work &f64, lwork int) int // dgesv computes the solution to a real system of linear equations. // @@ -143,7 +144,7 @@ pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) { // This is the block version of the algorithm, calling Level 3 BLAS. pub fn dpotrf(up bool, n int, mut a []f64, lda int) { unsafe { - info := C.LAPACKE_dpotrf(.row_major, l_uplo(up), n, &a[0], lda) + info := C.LAPACKE_dpotrf(.row_major, blas.l_uplo(up), n, &a[0], lda) if info != 0 { errors.vsl_panic('lapack failed', .efailed) } @@ -189,7 +190,7 @@ pub fn dgeev(calc_vl bool, calc_vr bool, n int, mut a []f64, lda int, wr []f64, ldvr = 1 } unsafe { - info := C.LAPACKE_dgeev(.row_major, &char(job_vlr(calc_vl).str().str), &char(job_vlr(calc_vr).str().str), + info := C.LAPACKE_dgeev(.row_major, &char(blas.job_vlr(calc_vl).str().str), &char(blas.job_vlr(calc_vr).str().str), n, &a[0], lda, &wr[0], &wi[0], &vvl, ldvl, &vvr, ldvr) if info != 0 { errors.vsl_panic('lapack failed', .efailed) diff --git a/vlas/lapack_default.c.v b/lapack/lapack_default.c.v similarity index 54% rename from vlas/lapack_default.c.v rename to lapack/lapack_default.c.v index deab5058f..a16459354 100644 --- a/vlas/lapack_default.c.v +++ b/lapack/lapack_default.c.v @@ -1,8 +1,8 @@ -module vlas +module lapack -import vsl.vlas.internal.blas +import vsl.blas.blas64 -fn C.LAPACKE_dlange(matrix_layout blas.MemoryLayout, norm &char, m int, n int, a &f64, lda int, work &f64) f64 +fn C.LAPACKE_dlange(matrix_layout blas64.MemoryLayout, norm &char, m int, n int, a &f64, lda int, work &f64) f64 pub fn dlange(norm rune, m int, n int, a []f64, lda int, work []f64) f64 { return unsafe { diff --git a/vlas/lapack_macos.c.v b/lapack/lapack_macos.c.v similarity index 94% rename from vlas/lapack_macos.c.v rename to lapack/lapack_macos.c.v index 2c0cbc7c3..b12ecacf9 100644 --- a/vlas/lapack_macos.c.v +++ b/lapack/lapack_macos.c.v @@ -1,4 +1,4 @@ -module vlas +module lapack fn C.LAPACKE_dlange(norm &char, m int, n int, a &f64, lda int, work &f64) f64 diff --git a/vlas/v.mod b/lapack/v.mod similarity index 63% rename from vlas/v.mod rename to lapack/v.mod index e036330dd..f4dca2f9d 100644 --- a/vlas/v.mod +++ b/lapack/v.mod @@ -1,6 +1,6 @@ Module { - name: 'vlas' - description: 'The V Linear Algebra System' + name: 'lapack' + description: 'The V Linear Algebra Package' version: '0.1.0' license: 'MIT' repo_url: 'https://github.com/vlang/vsl' diff --git a/vlas/README.md b/vlas/README.md deleted file mode 100644 index 1677eb1c8..000000000 --- a/vlas/README.md +++ /dev/null @@ -1,83 +0,0 @@ -# V Linear Algebra System - -This package implements BLAS and LAPACKE functions. It provides different backends: - -| Backend | Description | Status | Compilation Flags | -| -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------- | --------------------- | -| VLAS | Pure V implementation | WIP | `NONE` | -| OpenBLAS | OpenBLAS is an optimized BLAS library based on . Check the section [OpenBLAS Backend](#openblas-backend) for more information. | Working | `-d vsl_vlas_cblas` | -| LAPACKE | LAPACKE is a C interface to the LAPACK linear algebra routines | Working | `-d vsl_vlas_lapacke` | - -Therefore, its routines are a little more _lower level_ than the ones in the package `vsl.la`. - -## OpenBLAS Backend - -Use the flag `-d vsl_vlas_cblas` to use the OpenBLAS backend. - -### Install dependencies - -#### Debian/Ubuntu GNU Linux - -`libopenblas-dev` is not needed when using the pure V backend. - -```sh -sudo apt-get install -y --no-install-recommends \ - gcc \ - gfortran \ - libopenblas-dev -``` - -#### Arch Linux/Manjaro GNU Linux - -The best way of installing OpenBlas is using -[lapack-openblas](https://aur.archlinux.org/packages/lapack-openblas/). - -```sh -yay -S lapack-openblas -``` - -or - -```sh -git clone https://aur.archlinux.org/lapack-openblas.git /tmp/lapack-openblas -cd /tmp/lapack-openblas -makepkg -si -``` - -#### macOS - -```sh -brew install openblas -``` - -## LAPACKE Backend - -Use the flag `-d vsl_vlas_lapacke` to use the LAPACKE backend (enabled by default for now). - -### Install dependencies - -#### Debian/Ubuntu GNU Linux - -```sh -sudo apt-get install -y --no-install-recommends \ - gcc \ - gfortran \ - liblapacke-dev -``` - -#### Arch Linux/Manjaro GNU Linux - -The best way of installing LAPACKE is using -[lapack-openblas](https://aur.archlinux.org/packages/lapack-openblas/). - -```sh -yay -S lapack-openblas -``` - -or - -```sh -git clone https://aur.archlinux.org/lapack-openblas.git /tmp/lapack-openblas -cd /tmp/lapack-openblas -makepkg -si -``` diff --git a/vlas/oblas_d_vsl_vlas_cblas.v b/vlas/oblas_d_vsl_vlas_cblas.v deleted file mode 100644 index eb2d72b1d..000000000 --- a/vlas/oblas_d_vsl_vlas_cblas.v +++ /dev/null @@ -1,448 +0,0 @@ -module vlas - -import vsl.vlas.internal.blas - -fn C.openblas_set_num_threads(n int) - -fn C.cblas_sdsdot(n int, alpha f32, x &f32, incx int, y &f32, incy int) f32 -fn C.cblas_dsdot(n int, x &f32, incx int, y &f32, incy int) f64 -fn C.cblas_sdot(n int, x &f32, incx int, y &f32, incy int) f32 -fn C.cblas_ddot(n int, x &f64, incx int, y &f64, incy int) f64 -fn C.cblas_cdotu(n int, x voidptr, incx int, y voidptr, incy int) f32 -fn C.cblas_cdotc(n int, x voidptr, incx int, y voidptr, incy int) f32 -fn C.cblas_zdotu(n int, x voidptr, incx int, y voidptr, incy int) f64 -fn C.cblas_zdotc(n int, x voidptr, incx int, y voidptr, incy int) f64 -fn C.cblas_cdotu_sub(n int, x voidptr, incx int, y voidptr, incy int, ret voidptr) -fn C.cblas_cdotc_sub(n int, x voidptr, incx int, y voidptr, incy int, ret voidptr) -fn C.cblas_zdotu_sub(n int, x voidptr, incx int, y voidptr, incy int, ret voidptr) -fn C.cblas_zdotc_sub(n int, x voidptr, incx int, y voidptr, incy int, ret voidptr) -fn C.cblas_sasum(n int, x &f32, incx int) f32 -fn C.cblas_dasum(n int, x &f64, incx int) f64 -fn C.cblas_scasum(n int, x voidptr, incx int) f32 -fn C.cblas_dzasum(n int, x voidptr, incx int) f64 -fn C.cblas_ssum(n int, x &f32, incx int) f32 -fn C.cblas_dsum(n int, x &f64, incx int) f64 -fn C.cblas_scsum(n int, x voidptr, incx int) f32 -fn C.cblas_dzsum(n int, x voidptr, incx int) f64 -fn C.cblas_snrm2(n int, x &f32, incx int) f32 -fn C.cblas_dnrm2(n int, x &f64, incx int) f64 -fn C.cblas_scnrm2(n int, x voidptr, incx int) f32 -fn C.cblas_dznrm2(n int, x voidptr, incx int) f64 - -fn C.cblas_isamax(n int, x &f32, incx int) int -fn C.cblas_idamax(n int, x &f64, incx int) int -fn C.cblas_icamax(n int, x voidptr, incx int) int -fn C.cblas_izamax(n int, x voidptr, incx int) int -fn C.cblas_isamin(n int, x &f32, incx int) int -fn C.cblas_idamin(n int, x &f64, incx int) int -fn C.cblas_icamin(n int, x voidptr, incx int) int -fn C.cblas_izamin(n int, x voidptr, incx int) int -fn C.cblas_ismax(n int, x &f32, incx int) int -fn C.cblas_idmax(n int, x &f64, incx int) int -fn C.cblas_icmax(n int, x voidptr, incx int) int -fn C.cblas_izmax(n int, x voidptr, incx int) int -fn C.cblas_ismin(n int, x &f32, incx int) int -fn C.cblas_idmin(n int, x &f64, incx int) int -fn C.cblas_icmin(n int, x voidptr, incx int) int -fn C.cblas_izmin(n int, x voidptr, incx int) int -fn C.cblas_saxpy(n int, alpha f32, x &f32, incx int, y &f32, incy int) -fn C.cblas_daxpy(n int, alpha f64, x &f64, incx int, y &f64, incy int) -fn C.cblas_caxpy(n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int) -fn C.cblas_zaxpy(n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int) -fn C.cblas_scopy(n int, x &f32, incx int, y &f32, incy int) -fn C.cblas_dcopy(n int, x &f64, incx int, y &f64, incy int) -fn C.cblas_ccopy(n int, x voidptr, incx int, y voidptr, incy int) -fn C.cblas_zcopy(n int, x voidptr, incx int, y voidptr, incy int) -fn C.cblas_sswap(n int, x &f32, incx int, y &f32, incy int) -fn C.cblas_dswap(n int, x &f64, incx int, y &f64, incy int) -fn C.cblas_cswap(n int, x voidptr, incx int, y voidptr, incy int) -fn C.cblas_zswap(n int, x voidptr, incx int, y voidptr, incy int) -fn C.cblas_srot(n int, x &f32, incx int, y &f32, incy int, c f32, s f32) -fn C.cblas_drot(n int, x &f64, incx int, y &f64, incy int, c f64, s f64) -fn C.cblas_srotg(a &f32, b &f32, c &f32, s &f32) -fn C.cblas_drotg(a &f64, b &f64, c &f64, s &f64) -fn C.cblas_srotm(n int, x &f32, incx int, y &f32, incy int, p &f32) -fn C.cblas_drotm(n int, x &f64, incx int, y &f64, incy int, p &f64) -fn C.cblas_srotmg(d1 &f32, d2 &f32, b1 &f32, b2 f32, p &f32) -fn C.cblas_drotmg(d1 &f64, d2 &f64, b1 &f64, b2 f64, p &f64) -fn C.cblas_sscal(n int, alpha f32, x &f32, incx int) -fn C.cblas_dscal(n int, alpha f64, x &f64, incx int) -fn C.cblas_cscal(n int, alpha voidptr, x voidptr, incx int) -fn C.cblas_zscal(n int, alpha voidptr, x voidptr, incx int) -fn C.cblas_csscal(n int, alpha f32, x voidptr, incx int) -fn C.cblas_zdscal(n int, alpha f64, x voidptr, incx int) -fn C.cblas_sgemv(order blas.MemoryLayout, trans blas.Transpose, m int, n int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int) -fn C.cblas_dgemv(order blas.MemoryLayout, trans blas.Transpose, m int, n int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int) -fn C.cblas_cgemv(order blas.MemoryLayout, trans blas.Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_zgemv(order blas.MemoryLayout, trans blas.Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_sger(order blas.MemoryLayout, m int, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32, lda int) -fn C.cblas_dger(order blas.MemoryLayout, m int, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64, lda int) -fn C.cblas_cgeru(order blas.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) -fn C.cblas_cgerc(order blas.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) -fn C.cblas_zgeru(order blas.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) -fn C.cblas_zgerc(order blas.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) -fn C.cblas_strsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, a &f32, lda int, x &f32, incx int) -fn C.cblas_dtrsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, a &f64, lda int, x &f64, incx int) -fn C.cblas_ctrsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, a voidptr, lda int, x voidptr, incx int) -fn C.cblas_ztrsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, a voidptr, lda int, x voidptr, incx int) -fn C.cblas_strmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, a &f32, lda int, x &f32, incx int) -fn C.cblas_dtrmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, a &f64, lda int, x &f64, incx int) -fn C.cblas_ctrmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, a voidptr, lda int, x voidptr, incx int) -fn C.cblas_ztrmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, a voidptr, lda int, x voidptr, incx int) -fn C.cblas_ssyr(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f32, x &f32, incx int, a &f32, lda int) -fn C.cblas_dsyr(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f64, x &f64, incx int, a &f64, lda int) -fn C.cblas_cher(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f32, x voidptr, incx int, a voidptr, lda int) -fn C.cblas_zher(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f64, x voidptr, incx int, a voidptr, lda int) -fn C.cblas_ssyr2(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32, lda int) -fn C.cblas_dsyr2(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64, lda int) -fn C.cblas_cher2(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) -fn C.cblas_zher2(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) -fn C.cblas_sgbmv(order blas.MemoryLayout, transA blas.Transpose, m int, n int, kl int, ku int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int) -fn C.cblas_dgbmv(order blas.MemoryLayout, transA blas.Transpose, m int, n int, kl int, ku int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int) -fn C.cblas_cgbmv(order blas.MemoryLayout, transA blas.Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_zgbmv(order blas.MemoryLayout, transA blas.Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_ssbmv(order blas.MemoryLayout, uplo blas.Uplo, n int, k int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int) -fn C.cblas_dsbmv(order blas.MemoryLayout, uplo blas.Uplo, n int, k int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int) -fn C.cblas_stbmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, k int, a &f32, lda int, x &f32, incx int) -fn C.cblas_dtbmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, k int, a &f64, lda int, x &f64, incx int) -fn C.cblas_ctbmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int) -fn C.cblas_ztbmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int) -fn C.cblas_stbsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, k int, a &f32, lda int, x &f32, incx int) -fn C.cblas_dtbsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, k int, a &f64, lda int, x &f64, incx int) -fn C.cblas_ctbsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int) -fn C.cblas_ztbsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int) -fn C.cblas_stpmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, ap &f32, x &f32, incx int) -fn C.cblas_dtpmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, ap &f64, x &f64, incx int) -fn C.cblas_ctpmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, ap voidptr, x voidptr, incx int) -fn C.cblas_ztpmv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, ap voidptr, x voidptr, incx int) -fn C.cblas_stpsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, ap &f32, x &f32, incx int) -fn C.cblas_dtpsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, ap &f64, x &f64, incx int) -fn C.cblas_ctpsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, ap voidptr, x voidptr, incx int) -fn C.cblas_ztpsv(order blas.MemoryLayout, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, n int, ap voidptr, x voidptr, incx int) -fn C.cblas_ssymv(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int) -fn C.cblas_dsymv(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int) -fn C.cblas_chemv(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_zhemv(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_sspmv(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f32, ap &f32, x &f32, incx int, beta f32, y &f32, incy int) -fn C.cblas_dspmv(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f64, ap &f64, x &f64, incx int, beta f64, y &f64, incy int) -fn C.cblas_sspr(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f32, x &f32, incx int, ap &f32) -fn C.cblas_dspr(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f64, x &f64, incx int, ap &f64) -fn C.cblas_chpr(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f32, x voidptr, incx int, a voidptr) -fn C.cblas_zhpr(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f64, x voidptr, incx int, a voidptr) -fn C.cblas_sspr2(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32) -fn C.cblas_dspr2(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64) -fn C.cblas_chpr2(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, AP voidptr) -fn C.cblas_zhpr2(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, AP voidptr) -fn C.cblas_chbmv(order blas.MemoryLayout, uplo blas.Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_zhbmv(order blas.MemoryLayout, uplo blas.Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_chpmv(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha voidptr, AP voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_zhpmv(order blas.MemoryLayout, uplo blas.Uplo, n int, alpha voidptr, AP voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_sgemm(order blas.MemoryLayout, transA blas.Transpose, transB blas.Transpose, m int, n int, k int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int) -fn C.cblas_dgemm(order blas.MemoryLayout, transA blas.Transpose, transB blas.Transpose, m int, n int, k int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int) -fn C.cblas_cgemm(order blas.MemoryLayout, transA blas.Transpose, transB blas.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_cgemm3m(order blas.MemoryLayout, transA blas.Transpose, transB blas.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_zgemm(order blas.MemoryLayout, transA blas.Transpose, transB blas.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_zgemm3m(order blas.MemoryLayout, transA blas.Transpose, transB blas.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_ssymm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int) -fn C.cblas_dsymm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int) -fn C.cblas_csymm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_zsymm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_ssyrk(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha f32, a &f32, lda int, beta f32, c &f32, ldc int) -fn C.cblas_dsyrk(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha f64, a &f64, lda int, beta f64, c &f64, ldc int) -fn C.cblas_csyrk(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, c voidptr, ldc int) -fn C.cblas_zsyrk(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, c voidptr, ldc int) -fn C.cblas_ssyr2k(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int) -fn C.cblas_dsyr2k(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int) -fn C.cblas_csyr2k(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_zsyr2k(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_strmm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int) -fn C.cblas_dtrmm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int) -fn C.cblas_ctrmm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int) -fn C.cblas_ztrmm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int) -fn C.cblas_strsm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int) -fn C.cblas_dtrsm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int) -fn C.cblas_ctrsm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int) -fn C.cblas_ztrsm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int) -fn C.cblas_chemm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_zhemm(order blas.MemoryLayout, side blas.Side, uplo blas.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_cherk(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha f32, a voidptr, lda int, beta f32, c voidptr, ldc int) -fn C.cblas_zherk(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha f64, a voidptr, lda int, beta f64, c voidptr, ldc int) -fn C.cblas_cher2k(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta f32, c voidptr, ldc int) -fn C.cblas_zher2k(order blas.MemoryLayout, uplo blas.Uplo, trans blas.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta f64, c voidptr, ldc int) -fn C.cblas_xerbla(p int, rout &byte, form &byte, other voidptr) - -fn C.cblas_saxpby(n int, alpha f32, x &f32, incx int, beta f32, y &f32, incy int) -fn C.cblas_daxpby(n int, alpha f64, x &f64, incx int, beta f64, y &f64, incy int) -fn C.cblas_caxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_zaxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_somatcopy(corder blas.MemoryLayout, ctrans blas.Transpose, crows int, ccols int, calpha f32, a &f32, clda int, b &f32, cldb int) -fn C.cblas_domatcopy(corder blas.MemoryLayout, ctrans blas.Transpose, crows int, ccols int, calpha f64, a &f64, clda int, b &f64, cldb int) -fn C.cblas_comatcopy(corder blas.MemoryLayout, ctrans blas.Transpose, crows int, ccols int, calpha &f32, a &f32, clda int, b &f32, cldb int) -fn C.cblas_zomatcopy(corder blas.MemoryLayout, ctrans blas.Transpose, crows int, ccols int, calpha &f64, a &f64, clda int, b &f64, cldb int) -fn C.cblas_simatcopy(corder blas.MemoryLayout, ctrans blas.Transpose, crows int, ccols int, calpha f32, a &f32, clda int, cldb int) -fn C.cblas_dimatcopy(corder blas.MemoryLayout, ctrans blas.Transpose, crows int, ccols int, calpha f64, a &f64, clda int, cldb int) -fn C.cblas_cimatcopy(corder blas.MemoryLayout, ctrans blas.Transpose, crows int, ccols int, calpha &f32, a &f32, clda int, cldb int) -fn C.cblas_zimatcopy(corder blas.MemoryLayout, ctrans blas.Transpose, crows int, ccols int, calpha &f64, a &f64, clda int, cldb int) -fn C.cblas_sgeadd(corder blas.MemoryLayout, crows int, ccols int, calpha f32, a &f32, clda int, cbeta f32, c &f32, cldc int) -fn C.cblas_dgeadd(corder blas.MemoryLayout, crows int, ccols int, calpha f64, a &f64, clda int, cbeta f64, c &f64, cldc int) -fn C.cblas_cgeadd(corder blas.MemoryLayout, crows int, ccols int, calpha &f32, a &f32, clda int, cbeta &f32, c &f32, cldc int) -fn C.cblas_zgeadd(corder blas.MemoryLayout, crows int, ccols int, calpha &f64, a &f64, clda int, cbeta &f64, c &f64, cldc int) - -// set_num_threads sets the number of threads in OpenBLAS -pub fn set_num_threads(n int) { - C.openblas_set_num_threads(n) -} - -@[inline] -pub fn sdsdot(n int, alpha f32, x []f32, incx int, y []f32, incy int) f32 { - return C.cblas_sdsdot(n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) -} - -@[inline] -pub fn dsdot(n int, x []f32, incx int, y []f32, incy int) f64 { - return C.cblas_dsdot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) -} - -@[inline] -pub fn sdot(n int, x []f32, incx int, y []f32, incy int) f32 { - return C.cblas_sdot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) -} - -@[inline] -pub fn ddot(n int, x []f64, incx int, y []f64, incy int) f64 { - return C.cblas_ddot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) -} - -@[inline] -pub fn sasum(n int, x []f32, incx int) f32 { - return C.cblas_sasum(n, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn dasum(n int, x []f64, incx int) f64 { - return C.cblas_dasum(n, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn ssum(n int, x []f32, incx int) f32 { - return C.cblas_ssum(n, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn dsum(n int, x []f64, incx int) f64 { - return C.cblas_dsum(n, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn snrm2(n int, x []f32, incx int) f32 { - return C.cblas_snrm2(n, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn dnrm2(n int, x []f64, incx int) f64 { - return C.cblas_dnrm2(n, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn isamax(n int, x []f32, incx int) int { - return C.cblas_isamax(n, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn idamax(n int, x []f64, incx int) int { - return C.cblas_idamax(n, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn isamin(n int, x []f32, incx int) int { - return C.cblas_isamin(n, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn idamin(n int, x &f64, incx int) int { - return C.cblas_idamin(n, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn ismax(n int, x []f32, incx int) int { - return C.cblas_ismax(n, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn idmax(n int, x []f64, incx int) int { - return C.cblas_idmax(n, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn ismin(n int, x []f32, incx int) int { - return C.cblas_ismin(n, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn idmin(n int, x []f64, incx int) int { - return C.cblas_idmin(n, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn saxpy(n int, alpha f32, x []f32, incx int, mut y []f32, incy int) { - C.cblas_saxpy(n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) -} - -@[inline] -pub fn daxpy(n int, alpha f64, x []f64, incx int, mut y []f64, incy int) { - C.cblas_daxpy(n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) -} - -@[inline] -pub fn scopy(n int, mut x []f32, incx int, mut y []f32, incy int) { - C.cblas_scopy(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) -} - -@[inline] -pub fn dcopy(n int, mut x []f64, incx int, mut y []f64, incy int) { - C.cblas_dcopy(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) -} - -@[inline] -pub fn sswap(n int, mut x []f32, incx int, mut y []f32, incy int) { - C.cblas_sswap(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) -} - -@[inline] -pub fn dswap(n int, mut x []f64, incx int, mut y []f64, incy int) { - C.cblas_dswap(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) -} - -@[inline] -pub fn srot(n int, mut x []f32, incx int, mut y []f32, incy int, c f32, s f32) { - C.cblas_srot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, c, s) -} - -@[inline] -pub fn drot(n int, mut x []f64, incx int, mut y []f64, incy int, c f64, s f64) { - C.cblas_drot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, c, s) -} - -@[inline] -pub fn srotg(a f32, b f32, c f32, s f32) { - C.cblas_srotg(&a, &b, &c, &s) -} - -@[inline] -pub fn drotg(a f64, b f64, c f64, s f64) { - C.cblas_drotg(&a, &b, &c, &s) -} - -@[inline] -pub fn srotm(n int, x []f32, incx int, y []f32, incy int, p []f32) { - C.cblas_srotm(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, unsafe { &p[0] }) -} - -@[inline] -pub fn drotm(n int, x []f64, incx int, y []f64, incy int, p []f64) { - C.cblas_drotm(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, unsafe { &p[0] }) -} - -@[inline] -pub fn srotmg(d1 f32, d2 f32, b1 f32, b2 f32, p []f32) { - C.cblas_srotmg(&d1, &d2, &b1, b2, unsafe { &p[0] }) -} - -@[inline] -pub fn drotmg(d1 f64, d2 f64, b1 f64, b2 f32, p []f64) { - C.cblas_drotmg(&d1, &d2, &b1, b2, unsafe { &p[0] }) -} - -@[inline] -pub fn sscal(n int, alpha f32, mut x []f32, incx int) { - C.cblas_sscal(n, alpha, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn dscal(n int, alpha f64, mut x []f64, incx int) { - C.cblas_dscal(n, alpha, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn sgemv(trans bool, m int, n int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) { - C.cblas_sgemv(.row_major, c_trans(trans), m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, - incx, beta, unsafe { &y[0] }, incy) -} - -@[inline] -pub fn dgemv(trans bool, m int, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { - C.cblas_dgemv(.row_major, c_trans(trans), m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, - incx, beta, unsafe { &y[0] }, incy) -} - -@[inline] -pub fn sger(m int, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32, lda int) { - C.cblas_sger(.row_major, m, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, - unsafe { &a[0] }, lda) -} - -@[inline] -pub fn dger(m int, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) { - C.cblas_dger(.row_major, m, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, - unsafe { &a[0] }, lda) -} - -@[inline] -pub fn strsv(uplo bool, trans_a bool, diag blas.Diagonal, n int, a []f32, lda int, mut x []f32, incx int) { - C.cblas_strsv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] }, - lda, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn dtrsv(uplo bool, trans_a bool, diag blas.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { - C.cblas_dtrsv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] }, - lda, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn strmv(uplo bool, trans_a bool, diag blas.Diagonal, n int, a []f32, lda int, mut x []f32, incx int) { - C.cblas_strmv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] }, - lda, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn dtrmv(uplo bool, trans_a bool, diag blas.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { - C.cblas_dtrmv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] }, - lda, unsafe { &x[0] }, incx) -} - -@[inline] -pub fn ssyr(uplo bool, n int, alpha f32, x []f32, incx int, mut a []f32, lda int) { - C.cblas_ssyr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] }, - lda) -} - -@[inline] -pub fn dsyr(uplo bool, n int, alpha f64, x []f64, incx int, mut a []f64, lda int) { - C.cblas_dsyr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] }, - lda) -} - -@[inline] -pub fn ssyr2(uplo bool, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32, lda int) { - C.cblas_ssyr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, - incy, unsafe { &a[0] }, lda) -} - -@[inline] -pub fn dsyr2(uplo bool, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) { - C.cblas_dsyr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, - incy, unsafe { &a[0] }, lda) -} - -@[inline] -pub fn sgemm(trans_a bool, trans_b bool, m int, n int, k int, alpha f32, a []f32, lda int, b []f32, ldb int, beta f32, mut cc []f32, ldc int) { - C.cblas_sgemm(.row_major, c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, unsafe { &a[0] }, - lda, unsafe { &b[0] }, ldb, beta, unsafe { &cc[0] }, ldc) -} - -@[inline] -pub fn dgemm(trans_a bool, trans_b bool, m int, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut cc []f64, ldc int) { - C.cblas_dgemm(.row_major, c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, unsafe { &a[0] }, - lda, unsafe { &b[0] }, ldb, beta, unsafe { &cc[0] }, ldc) -} From d667c95c858bd01bfec2def6f2311045c205e1fe Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 7 Jan 2024 02:15:22 -0300 Subject: [PATCH 02/33] Update installation instructions for LAPACK-OpenBLAS --- blas/README.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/blas/README.md b/blas/README.md index ac8c441d8..a21bdf992 100644 --- a/blas/README.md +++ b/blas/README.md @@ -57,10 +57,4 @@ cd /tmp/lapack-openblas makepkg -si ``` -### macOS - -```sh -brew install openblas -``` - From 10eea19687af64e1c8fb7f5d6eaf6cc4fafe9304 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 7 Jan 2024 02:37:19 -0300 Subject: [PATCH 03/33] Update imports and function signatures in lapack and blas modules --- blas/blas64/conversions.v | 5 + blas/conversions.v | 23 +++- blas/oblas_d_vsl_blas_cblas.v | 230 +++++++++++++++---------------- blas/oblas_notd_vsl_blas_cblas.v | 4 +- lapack/lapack_common.v | 19 ++- lapack/lapack_default.c.v | 4 +- 6 files changed, 153 insertions(+), 132 deletions(-) diff --git a/blas/blas64/conversions.v b/blas/blas64/conversions.v index 591e7a2e8..6688d5887 100644 --- a/blas/blas64/conversions.v +++ b/blas/blas64/conversions.v @@ -1,10 +1,12 @@ module blas64 +// MemoryLayout is used to specify the memory layout of a matrix. pub enum MemoryLayout { row_major = 101 col_major = 102 } +// Transpose is used to specify the transposition of a matrix. pub enum Transpose { no_trans = 111 trans = 112 @@ -12,16 +14,19 @@ pub enum Transpose { conj_no_trans = 114 } +// Uplo is used to specify whether the upper or lower triangle of a matrix is pub enum Uplo { upper = 121 lower = 122 } +// Diagonal is used to specify whether the diagonal of a matrix is unit or non-unit. pub enum Diagonal { non_unit = 131 unit = 132 } +// Side is used to specify whether a matrix is on the left or right side in a matrix-matrix multiplication. pub enum Side { left = 141 right = 142 diff --git a/blas/conversions.v b/blas/conversions.v index 154edade5..e917e4f58 100644 --- a/blas/conversions.v +++ b/blas/conversions.v @@ -6,18 +6,37 @@ import math.complex import vsl.errors import vsl.blas.blas64 -pub fn c_trans(trans bool) blas64.Transpose { +// MemoryLayout is used to specify the memory layout of a matrix. +pub type MemoryLayout = blas64.MemoryLayout + +// Transpose is used to specify the transposition of a matrix. +pub type Transpose = blas64.Transpose + +// Uplo is used to specify whether the upper or lower triangle of a matrix is +pub type Uplo = blas64.Uplo + +// Diagonal is used to specify whether the diagonal of a matrix is unit or non-unit. +pub type Diagonal = blas64.Diagonal + +// Side is used to specify whether a matrix is on the left or right side in a matrix-matrix multiplication. +pub type Side = blas64.Side + +// c_trans is a helper function to convert bool to Transpose +pub fn c_trans(trans bool) Transpose { return if trans { .trans } else { .no_trans } } -pub fn c_uplo(up bool) blas64.Uplo { +// c_uplo is a helper function to convert bool to Uplo +pub fn c_uplo(up bool) Uplo { return if up { .upper } else { .lower } } +// l_uplo is a helper function to convert bool to Uplo fn l_uplo(up bool) u8 { return if up { `U` } else { `L` } } +// job_vlr is a helper function to convert bool to char fn job_vlr(do_calc bool) rune { return if do_calc { `V` } else { `N` } } diff --git a/blas/oblas_d_vsl_blas_cblas.v b/blas/oblas_d_vsl_blas_cblas.v index b4483c995..29be3be06 100644 --- a/blas/oblas_d_vsl_blas_cblas.v +++ b/blas/oblas_d_vsl_blas_cblas.v @@ -1,7 +1,5 @@ module blas -import vsl.blas64.blas64 - fn C.openblas_set_num_threads(n int) fn C.cblas_sdsdot(n int, alpha f32, x &f32, incx int, y &f32, incy int) f32 @@ -71,122 +69,122 @@ fn C.cblas_cscal(n int, alpha voidptr, x voidptr, incx int) fn C.cblas_zscal(n int, alpha voidptr, x voidptr, incx int) fn C.cblas_csscal(n int, alpha f32, x voidptr, incx int) fn C.cblas_zdscal(n int, alpha f64, x voidptr, incx int) -fn C.cblas_sgemv(order blas64.MemoryLayout, trans blas64.Transpose, m int, n int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int) -fn C.cblas_dgemv(order blas64.MemoryLayout, trans blas64.Transpose, m int, n int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int) -fn C.cblas_cgemv(order blas64.MemoryLayout, trans blas64.Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_zgemv(order blas64.MemoryLayout, trans blas64.Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_sger(order blas64.MemoryLayout, m int, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32, lda int) -fn C.cblas_dger(order blas64.MemoryLayout, m int, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64, lda int) -fn C.cblas_cgeru(order blas64.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) -fn C.cblas_cgerc(order blas64.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) -fn C.cblas_zgeru(order blas64.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) -fn C.cblas_zgerc(order blas64.MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) -fn C.cblas_strsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a &f32, lda int, x &f32, incx int) -fn C.cblas_dtrsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a &f64, lda int, x &f64, incx int) -fn C.cblas_ctrsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a voidptr, lda int, x voidptr, incx int) -fn C.cblas_ztrsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a voidptr, lda int, x voidptr, incx int) -fn C.cblas_strmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a &f32, lda int, x &f32, incx int) -fn C.cblas_dtrmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a &f64, lda int, x &f64, incx int) -fn C.cblas_ctrmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a voidptr, lda int, x voidptr, incx int) -fn C.cblas_ztrmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, a voidptr, lda int, x voidptr, incx int) -fn C.cblas_ssyr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x &f32, incx int, a &f32, lda int) -fn C.cblas_dsyr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x &f64, incx int, a &f64, lda int) -fn C.cblas_cher(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x voidptr, incx int, a voidptr, lda int) -fn C.cblas_zher(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x voidptr, incx int, a voidptr, lda int) -fn C.cblas_ssyr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32, lda int) -fn C.cblas_dsyr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64, lda int) -fn C.cblas_cher2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) -fn C.cblas_zher2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) -fn C.cblas_sgbmv(order blas64.MemoryLayout, transA blas64.Transpose, m int, n int, kl int, ku int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int) -fn C.cblas_dgbmv(order blas64.MemoryLayout, transA blas64.Transpose, m int, n int, kl int, ku int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int) -fn C.cblas_cgbmv(order blas64.MemoryLayout, transA blas64.Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_zgbmv(order blas64.MemoryLayout, transA blas64.Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_ssbmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, k int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int) -fn C.cblas_dsbmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, k int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int) -fn C.cblas_stbmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a &f32, lda int, x &f32, incx int) -fn C.cblas_dtbmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a &f64, lda int, x &f64, incx int) -fn C.cblas_ctbmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int) -fn C.cblas_ztbmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int) -fn C.cblas_stbsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a &f32, lda int, x &f32, incx int) -fn C.cblas_dtbsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a &f64, lda int, x &f64, incx int) -fn C.cblas_ctbsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int) -fn C.cblas_ztbsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int) -fn C.cblas_stpmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap &f32, x &f32, incx int) -fn C.cblas_dtpmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap &f64, x &f64, incx int) -fn C.cblas_ctpmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap voidptr, x voidptr, incx int) -fn C.cblas_ztpmv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap voidptr, x voidptr, incx int) -fn C.cblas_stpsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap &f32, x &f32, incx int) -fn C.cblas_dtpsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap &f64, x &f64, incx int) -fn C.cblas_ctpsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap voidptr, x voidptr, incx int) -fn C.cblas_ztpsv(order blas64.MemoryLayout, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, n int, ap voidptr, x voidptr, incx int) -fn C.cblas_ssymv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int) -fn C.cblas_dsymv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int) -fn C.cblas_chemv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_zhemv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_sspmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, ap &f32, x &f32, incx int, beta f32, y &f32, incy int) -fn C.cblas_dspmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, ap &f64, x &f64, incx int, beta f64, y &f64, incy int) -fn C.cblas_sspr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x &f32, incx int, ap &f32) -fn C.cblas_dspr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x &f64, incx int, ap &f64) -fn C.cblas_chpr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x voidptr, incx int, a voidptr) -fn C.cblas_zhpr(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x voidptr, incx int, a voidptr) -fn C.cblas_sspr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32) -fn C.cblas_dspr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64) -fn C.cblas_chpr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, AP voidptr) -fn C.cblas_zhpr2(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, AP voidptr) -fn C.cblas_chbmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_zhbmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_chpmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, AP voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_zhpmv(order blas64.MemoryLayout, uplo blas64.Uplo, n int, alpha voidptr, AP voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_sgemm(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int) -fn C.cblas_dgemm(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int) -fn C.cblas_cgemm(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_cgemm3m(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_zgemm(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_zgemm3m(order blas64.MemoryLayout, transA blas64.Transpose, transB blas64.Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_ssymm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int) -fn C.cblas_dsymm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int) -fn C.cblas_csymm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_zsymm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_ssyrk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f32, a &f32, lda int, beta f32, c &f32, ldc int) -fn C.cblas_dsyrk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f64, a &f64, lda int, beta f64, c &f64, ldc int) -fn C.cblas_csyrk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, c voidptr, ldc int) -fn C.cblas_zsyrk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, c voidptr, ldc int) -fn C.cblas_ssyr2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int) -fn C.cblas_dsyr2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int) -fn C.cblas_csyr2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_zsyr2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_strmm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int) -fn C.cblas_dtrmm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int) -fn C.cblas_ctrmm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int) -fn C.cblas_ztrmm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int) -fn C.cblas_strsm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int) -fn C.cblas_dtrsm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int) -fn C.cblas_ctrsm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int) -fn C.cblas_ztrsm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, transA blas64.Transpose, diag blas64.Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int) -fn C.cblas_chemm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_zhemm(order blas64.MemoryLayout, side blas64.Side, uplo blas64.Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) -fn C.cblas_cherk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f32, a voidptr, lda int, beta f32, c voidptr, ldc int) -fn C.cblas_zherk(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha f64, a voidptr, lda int, beta f64, c voidptr, ldc int) -fn C.cblas_cher2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta f32, c voidptr, ldc int) -fn C.cblas_zher2k(order blas64.MemoryLayout, uplo blas64.Uplo, trans blas64.Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta f64, c voidptr, ldc int) +fn C.cblas_sgemv(order MemoryLayout, trans Transpose, m int, n int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int) +fn C.cblas_dgemv(order MemoryLayout, trans Transpose, m int, n int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int) +fn C.cblas_cgemv(order MemoryLayout, trans Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_zgemv(order MemoryLayout, trans Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_sger(order MemoryLayout, m int, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32, lda int) +fn C.cblas_dger(order MemoryLayout, m int, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64, lda int) +fn C.cblas_cgeru(order MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) +fn C.cblas_cgerc(order MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) +fn C.cblas_zgeru(order MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) +fn C.cblas_zgerc(order MemoryLayout, m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) +fn C.cblas_strsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, a &f32, lda int, x &f32, incx int) +fn C.cblas_dtrsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, a &f64, lda int, x &f64, incx int) +fn C.cblas_ctrsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, a voidptr, lda int, x voidptr, incx int) +fn C.cblas_ztrsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, a voidptr, lda int, x voidptr, incx int) +fn C.cblas_strmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, a &f32, lda int, x &f32, incx int) +fn C.cblas_dtrmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, a &f64, lda int, x &f64, incx int) +fn C.cblas_ctrmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, a voidptr, lda int, x voidptr, incx int) +fn C.cblas_ztrmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, a voidptr, lda int, x voidptr, incx int) +fn C.cblas_ssyr(order MemoryLayout, uplo Uplo, n int, alpha f32, x &f32, incx int, a &f32, lda int) +fn C.cblas_dsyr(order MemoryLayout, uplo Uplo, n int, alpha f64, x &f64, incx int, a &f64, lda int) +fn C.cblas_cher(order MemoryLayout, uplo Uplo, n int, alpha f32, x voidptr, incx int, a voidptr, lda int) +fn C.cblas_zher(order MemoryLayout, uplo Uplo, n int, alpha f64, x voidptr, incx int, a voidptr, lda int) +fn C.cblas_ssyr2(order MemoryLayout, uplo Uplo, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32, lda int) +fn C.cblas_dsyr2(order MemoryLayout, uplo Uplo, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64, lda int) +fn C.cblas_cher2(order MemoryLayout, uplo Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) +fn C.cblas_zher2(order MemoryLayout, uplo Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, a voidptr, lda int) +fn C.cblas_sgbmv(order MemoryLayout, transA Transpose, m int, n int, kl int, ku int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int) +fn C.cblas_dgbmv(order MemoryLayout, transA Transpose, m int, n int, kl int, ku int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int) +fn C.cblas_cgbmv(order MemoryLayout, transA Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_zgbmv(order MemoryLayout, transA Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_ssbmv(order MemoryLayout, uplo Uplo, n int, k int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int) +fn C.cblas_dsbmv(order MemoryLayout, uplo Uplo, n int, k int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int) +fn C.cblas_stbmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, k int, a &f32, lda int, x &f32, incx int) +fn C.cblas_dtbmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, k int, a &f64, lda int, x &f64, incx int) +fn C.cblas_ctbmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int) +fn C.cblas_ztbmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int) +fn C.cblas_stbsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, k int, a &f32, lda int, x &f32, incx int) +fn C.cblas_dtbsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, k int, a &f64, lda int, x &f64, incx int) +fn C.cblas_ctbsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int) +fn C.cblas_ztbsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, k int, a voidptr, lda int, x voidptr, incx int) +fn C.cblas_stpmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, ap &f32, x &f32, incx int) +fn C.cblas_dtpmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, ap &f64, x &f64, incx int) +fn C.cblas_ctpmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, ap voidptr, x voidptr, incx int) +fn C.cblas_ztpmv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, ap voidptr, x voidptr, incx int) +fn C.cblas_stpsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, ap &f32, x &f32, incx int) +fn C.cblas_dtpsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, ap &f64, x &f64, incx int) +fn C.cblas_ctpsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, ap voidptr, x voidptr, incx int) +fn C.cblas_ztpsv(order MemoryLayout, uplo Uplo, transA Transpose, diag Diagonal, n int, ap voidptr, x voidptr, incx int) +fn C.cblas_ssymv(order MemoryLayout, uplo Uplo, n int, alpha f32, a &f32, lda int, x &f32, incx int, beta f32, y &f32, incy int) +fn C.cblas_dsymv(order MemoryLayout, uplo Uplo, n int, alpha f64, a &f64, lda int, x &f64, incx int, beta f64, y &f64, incy int) +fn C.cblas_chemv(order MemoryLayout, uplo Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_zhemv(order MemoryLayout, uplo Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_sspmv(order MemoryLayout, uplo Uplo, n int, alpha f32, ap &f32, x &f32, incx int, beta f32, y &f32, incy int) +fn C.cblas_dspmv(order MemoryLayout, uplo Uplo, n int, alpha f64, ap &f64, x &f64, incx int, beta f64, y &f64, incy int) +fn C.cblas_sspr(order MemoryLayout, uplo Uplo, n int, alpha f32, x &f32, incx int, ap &f32) +fn C.cblas_dspr(order MemoryLayout, uplo Uplo, n int, alpha f64, x &f64, incx int, ap &f64) +fn C.cblas_chpr(order MemoryLayout, uplo Uplo, n int, alpha f32, x voidptr, incx int, a voidptr) +fn C.cblas_zhpr(order MemoryLayout, uplo Uplo, n int, alpha f64, x voidptr, incx int, a voidptr) +fn C.cblas_sspr2(order MemoryLayout, uplo Uplo, n int, alpha f32, x &f32, incx int, y &f32, incy int, a &f32) +fn C.cblas_dspr2(order MemoryLayout, uplo Uplo, n int, alpha f64, x &f64, incx int, y &f64, incy int, a &f64) +fn C.cblas_chpr2(order MemoryLayout, uplo Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, AP voidptr) +fn C.cblas_zhpr2(order MemoryLayout, uplo Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, AP voidptr) +fn C.cblas_chbmv(order MemoryLayout, uplo Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_zhbmv(order MemoryLayout, uplo Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_chpmv(order MemoryLayout, uplo Uplo, n int, alpha voidptr, AP voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_zhpmv(order MemoryLayout, uplo Uplo, n int, alpha voidptr, AP voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int) +fn C.cblas_sgemm(order MemoryLayout, transA Transpose, transB Transpose, m int, n int, k int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int) +fn C.cblas_dgemm(order MemoryLayout, transA Transpose, transB Transpose, m int, n int, k int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int) +fn C.cblas_cgemm(order MemoryLayout, transA Transpose, transB Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_cgemm3m(order MemoryLayout, transA Transpose, transB Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_zgemm(order MemoryLayout, transA Transpose, transB Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_zgemm3m(order MemoryLayout, transA Transpose, transB Transpose, m int, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_ssymm(order MemoryLayout, side Side, uplo Uplo, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int) +fn C.cblas_dsymm(order MemoryLayout, side Side, uplo Uplo, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int) +fn C.cblas_csymm(order MemoryLayout, side Side, uplo Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_zsymm(order MemoryLayout, side Side, uplo Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_ssyrk(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha f32, a &f32, lda int, beta f32, c &f32, ldc int) +fn C.cblas_dsyrk(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha f64, a &f64, lda int, beta f64, c &f64, ldc int) +fn C.cblas_csyrk(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, c voidptr, ldc int) +fn C.cblas_zsyrk(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, c voidptr, ldc int) +fn C.cblas_ssyr2k(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha f32, a &f32, lda int, b &f32, ldb int, beta f32, c &f32, ldc int) +fn C.cblas_dsyr2k(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha f64, a &f64, lda int, b &f64, ldb int, beta f64, c &f64, ldc int) +fn C.cblas_csyr2k(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_zsyr2k(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_strmm(order MemoryLayout, side Side, uplo Uplo, transA Transpose, diag Diagonal, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int) +fn C.cblas_dtrmm(order MemoryLayout, side Side, uplo Uplo, transA Transpose, diag Diagonal, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int) +fn C.cblas_ctrmm(order MemoryLayout, side Side, uplo Uplo, transA Transpose, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int) +fn C.cblas_ztrmm(order MemoryLayout, side Side, uplo Uplo, transA Transpose, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int) +fn C.cblas_strsm(order MemoryLayout, side Side, uplo Uplo, transA Transpose, diag Diagonal, m int, n int, alpha f32, a &f32, lda int, b &f32, ldb int) +fn C.cblas_dtrsm(order MemoryLayout, side Side, uplo Uplo, transA Transpose, diag Diagonal, m int, n int, alpha f64, a &f64, lda int, b &f64, ldb int) +fn C.cblas_ctrsm(order MemoryLayout, side Side, uplo Uplo, transA Transpose, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int) +fn C.cblas_ztrsm(order MemoryLayout, side Side, uplo Uplo, transA Transpose, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int) +fn C.cblas_chemm(order MemoryLayout, side Side, uplo Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_zhemm(order MemoryLayout, side Side, uplo Uplo, m int, n int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta voidptr, c voidptr, ldc int) +fn C.cblas_cherk(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha f32, a voidptr, lda int, beta f32, c voidptr, ldc int) +fn C.cblas_zherk(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha f64, a voidptr, lda int, beta f64, c voidptr, ldc int) +fn C.cblas_cher2k(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta f32, c voidptr, ldc int) +fn C.cblas_zher2k(order MemoryLayout, uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, B voidptr, ldb int, beta f64, c voidptr, ldc int) fn C.cblas_xerbla(p int, rout &byte, form &byte, other voidptr) fn C.cblas_saxpby(n int, alpha f32, x &f32, incx int, beta f32, y &f32, incy int) fn C.cblas_daxpby(n int, alpha f64, x &f64, incx int, beta f64, y &f64, incy int) fn C.cblas_caxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int) fn C.cblas_zaxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, y voidptr, incy int) -fn C.cblas_somatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha f32, a &f32, clda int, b &f32, cldb int) -fn C.cblas_domatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha f64, a &f64, clda int, b &f64, cldb int) -fn C.cblas_comatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha &f32, a &f32, clda int, b &f32, cldb int) -fn C.cblas_zomatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha &f64, a &f64, clda int, b &f64, cldb int) -fn C.cblas_simatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha f32, a &f32, clda int, cldb int) -fn C.cblas_dimatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha f64, a &f64, clda int, cldb int) -fn C.cblas_cimatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha &f32, a &f32, clda int, cldb int) -fn C.cblas_zimatcopy(corder blas64.MemoryLayout, ctrans blas64.Transpose, crows int, ccols int, calpha &f64, a &f64, clda int, cldb int) -fn C.cblas_sgeadd(corder blas64.MemoryLayout, crows int, ccols int, calpha f32, a &f32, clda int, cbeta f32, c &f32, cldc int) -fn C.cblas_dgeadd(corder blas64.MemoryLayout, crows int, ccols int, calpha f64, a &f64, clda int, cbeta f64, c &f64, cldc int) -fn C.cblas_cgeadd(corder blas64.MemoryLayout, crows int, ccols int, calpha &f32, a &f32, clda int, cbeta &f32, c &f32, cldc int) -fn C.cblas_zgeadd(corder blas64.MemoryLayout, crows int, ccols int, calpha &f64, a &f64, clda int, cbeta &f64, c &f64, cldc int) +fn C.cblas_somatcopy(corder MemoryLayout, ctrans Transpose, crows int, ccols int, calpha f32, a &f32, clda int, b &f32, cldb int) +fn C.cblas_domatcopy(corder MemoryLayout, ctrans Transpose, crows int, ccols int, calpha f64, a &f64, clda int, b &f64, cldb int) +fn C.cblas_comatcopy(corder MemoryLayout, ctrans Transpose, crows int, ccols int, calpha &f32, a &f32, clda int, b &f32, cldb int) +fn C.cblas_zomatcopy(corder MemoryLayout, ctrans Transpose, crows int, ccols int, calpha &f64, a &f64, clda int, b &f64, cldb int) +fn C.cblas_simatcopy(corder MemoryLayout, ctrans Transpose, crows int, ccols int, calpha f32, a &f32, clda int, cldb int) +fn C.cblas_dimatcopy(corder MemoryLayout, ctrans Transpose, crows int, ccols int, calpha f64, a &f64, clda int, cldb int) +fn C.cblas_cimatcopy(corder MemoryLayout, ctrans Transpose, crows int, ccols int, calpha &f32, a &f32, clda int, cldb int) +fn C.cblas_zimatcopy(corder MemoryLayout, ctrans Transpose, crows int, ccols int, calpha &f64, a &f64, clda int, cldb int) +fn C.cblas_sgeadd(corder MemoryLayout, crows int, ccols int, calpha f32, a &f32, clda int, cbeta f32, c &f32, cldc int) +fn C.cblas_dgeadd(corder MemoryLayout, crows int, ccols int, calpha f64, a &f64, clda int, cbeta f64, c &f64, cldc int) +fn C.cblas_cgeadd(corder MemoryLayout, crows int, ccols int, calpha &f32, a &f32, clda int, cbeta &f32, c &f32, cldc int) +fn C.cblas_zgeadd(corder MemoryLayout, crows int, ccols int, calpha &f64, a &f64, clda int, cbeta &f64, c &f64, cldc int) // set_num_threads sets the number of threads in OpenBLAS pub fn set_num_threads(n int) { @@ -388,25 +386,25 @@ pub fn dger(m int, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a } @[inline] -pub fn strsv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f32, lda int, mut x []f32, incx int) { +pub fn strsv(uplo bool, trans_a bool, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) { C.cblas_strsv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] }, lda, unsafe { &x[0] }, incx) } @[inline] -pub fn dtrsv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { +pub fn dtrsv(uplo bool, trans_a bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { C.cblas_dtrsv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] }, lda, unsafe { &x[0] }, incx) } @[inline] -pub fn strmv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f32, lda int, mut x []f32, incx int) { +pub fn strmv(uplo bool, trans_a bool, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) { C.cblas_strmv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] }, lda, unsafe { &x[0] }, incx) } @[inline] -pub fn dtrmv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { +pub fn dtrmv(uplo bool, trans_a bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { C.cblas_dtrmv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] }, lda, unsafe { &x[0] }, incx) } diff --git a/blas/oblas_notd_vsl_blas_cblas.v b/blas/oblas_notd_vsl_blas_cblas.v index 763995224..8552dde50 100644 --- a/blas/oblas_notd_vsl_blas_cblas.v +++ b/blas/oblas_notd_vsl_blas_cblas.v @@ -57,12 +57,12 @@ pub fn dger(m int, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a } @[inline] -pub fn dtrsv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { +pub fn dtrsv(uplo bool, trans_a bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { blas64.dtrsv(c_uplo(uplo), c_trans(trans_a), diag, n, a, lda, mut x, incx) } @[inline] -pub fn dtrmv(uplo bool, trans_a bool, diag blas64.Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { +pub fn dtrmv(uplo bool, trans_a bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { blas64.dtrmv(c_uplo(uplo), c_trans(trans_a), diag, n, a, lda, mut x, incx) } diff --git a/lapack/lapack_common.v b/lapack/lapack_common.v index 14d291ee0..0c4064c2b 100644 --- a/lapack/lapack_common.v +++ b/lapack/lapack_common.v @@ -2,25 +2,24 @@ module lapack import vsl.errors import vsl.blas -import vsl.blas.blas64 -fn C.LAPACKE_dgesv(matrix_layout blas64.MemoryLayout, n int, nrhs int, a &f64, lda int, ipiv &int, b &f64, ldb int) int +fn C.LAPACKE_dgesv(matrix_layout blas.MemoryLayout, n int, nrhs int, a &f64, lda int, ipiv &int, b &f64, ldb int) int -fn C.LAPACKE_dgesvd(matrix_layout blas64.MemoryLayout, jobu &char, jobvt &char, m int, n int, a &f64, lda int, s &f64, u &f64, ldu int, vt &f64, ldvt int, superb &f64) int +fn C.LAPACKE_dgesvd(matrix_layout blas.MemoryLayout, jobu &char, jobvt &char, m int, n int, a &f64, lda int, s &f64, u &f64, ldu int, vt &f64, ldvt int, superb &f64) int -fn C.LAPACKE_dgetrf(matrix_layout blas64.MemoryLayout, m int, n int, a &f64, lda int, ipiv &int) int +fn C.LAPACKE_dgetrf(matrix_layout blas.MemoryLayout, m int, n int, a &f64, lda int, ipiv &int) int -fn C.LAPACKE_dgetri(matrix_layout blas64.MemoryLayout, n int, a &f64, lda int, ipiv &int) int +fn C.LAPACKE_dgetri(matrix_layout blas.MemoryLayout, n int, a &f64, lda int, ipiv &int) int -fn C.LAPACKE_dpotrf(matrix_layout blas64.MemoryLayout, up u32, n int, a &f64, lda int) int +fn C.LAPACKE_dpotrf(matrix_layout blas.MemoryLayout, up u32, n int, a &f64, lda int) int -fn C.LAPACKE_dgeev(matrix_layout blas64.MemoryLayout, calc_vl &char, calc_vr &char, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int +fn C.LAPACKE_dgeev(matrix_layout blas.MemoryLayout, calc_vl &char, calc_vr &char, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int -fn C.LAPACKE_dsyev(matrix_layout blas64.MemoryLayout, jobz byte, uplo byte, n int, a &f64, lda int, w &f64, work &f64, lwork int) int +fn C.LAPACKE_dsyev(matrix_layout blas.MemoryLayout, jobz byte, uplo byte, n int, a &f64, lda int, w &f64, work &f64, lwork int) int -fn C.LAPACKE_dgebal(matrix_layout blas64.MemoryLayout, job &char, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int +fn C.LAPACKE_dgebal(matrix_layout blas.MemoryLayout, job &char, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int -fn C.LAPACKE_dgehrd(matrix_layout blas64.MemoryLayout, n int, ilo int, ihi int, a &f64, lda int, tau &f64, work &f64, lwork int) int +fn C.LAPACKE_dgehrd(matrix_layout blas.MemoryLayout, n int, ilo int, ihi int, a &f64, lda int, tau &f64, work &f64, lwork int) int // dgesv computes the solution to a real system of linear equations. // diff --git a/lapack/lapack_default.c.v b/lapack/lapack_default.c.v index a16459354..783b54980 100644 --- a/lapack/lapack_default.c.v +++ b/lapack/lapack_default.c.v @@ -1,8 +1,8 @@ module lapack -import vsl.blas.blas64 +import vsl.blas -fn C.LAPACKE_dlange(matrix_layout blas64.MemoryLayout, norm &char, m int, n int, a &f64, lda int, work &f64) f64 +fn C.LAPACKE_dlange(matrix_layout blas.MemoryLayout, norm &char, m int, n int, a &f64, lda int, work &f64) f64 pub fn dlange(norm rune, m int, n int, a []f64, lda int, work []f64) f64 { return unsafe { From 5bd805c1edd4a9306dca531d9bb0bff68218b62f Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 7 Jan 2024 03:00:06 -0300 Subject: [PATCH 04/33] Update import statements in lapack module --- lapack/lapack64/dgesv.v | 4 +-- lapack/lapack64/dgetrf.v | 2 +- lapack/lapack64/dgetrs.v | 67 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 lapack/lapack64/dgetrs.v diff --git a/lapack/lapack64/dgesv.v b/lapack/lapack64/dgesv.v index dc8b61bdf..03bbb93d3 100644 --- a/lapack/lapack64/dgesv.v +++ b/lapack/lapack64/dgesv.v @@ -1,7 +1,7 @@ module lapack import math -import vsl.blas.blas64 +import vsl.blas // dgesv computes the solution to a real system of linear equations // @@ -32,7 +32,7 @@ pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb if lda < math.max(1, n) { panic(bad_ld_a) } - if ldb < math.max(1, n) { + if ldb < math.max(1, nrhs) { panic(bad_ld_b) } diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v index 54e383eac..370f7a5e1 100644 --- a/lapack/lapack64/dgetrf.v +++ b/lapack/lapack64/dgetrf.v @@ -1,7 +1,7 @@ module lapack import math -import vsl.blas.blas64 +import vsl.blas // dgetrf computes the LU decomposition of an m×n matrix A using partial // pivoting with row interchanges. diff --git a/lapack/lapack64/dgetrs.v b/lapack/lapack64/dgetrs.v new file mode 100644 index 000000000..e26b661c8 --- /dev/null +++ b/lapack/lapack64/dgetrs.v @@ -0,0 +1,67 @@ +module lapack + +import math +import vsl.blas + +// dgetrs solves a system of equations using an LU factorization. +// The system of equations solved is +// +// A * X = B if trans == blas.Trans +// Aᵀ * X = B if trans == blas.NoTrans +// +// A is a general n×n matrix with stride lda. B is a general matrix of size n×nrhs. +// +// On entry b contains the elements of the matrix B. On exit, b contains the +// elements of X, the solution to the system of equations. +// +// a and ipiv contain the LU factorization of A and the permutation indices as +// computed by Dgetrf. ipiv is zero-indexed. +pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []float64, lda int, ipiv []int, mut b []float64, ldb int) { + if trans != .no_trans && trans != .trans && trans != .conj_trans { + panic(bad_trans) + } + if n < 0 { + panic(n_lt0) + } + if nrhs < 0 { + panic(nrhs_lt0) + } + if lda < math.max(1, n) { + panic(bad_ld_a) + } + if ldb < math.max(1, nrhs) { + panic(bad_ld_b) + } + + // Quick return if possible. + if n == 0 || nrhs == 0 { + return + } + + if a.len < (n - 1) * lda + n { + panic(short_ab) + } + if b.len < (n - 1) * ldb + nrhs { + panic(short_b) + } + if ipiv.len != n { + panic(bad_len_ipiv) + } + + if trans != .no_trans { + // Solve A * X = B. + dlaswp(nrhs, b, ldb, 0, n - 1, ipiv, 1) + // Solve L * X = B, overwriting B with X. + blas.dtrsm(.left, .lower, .no_trans, .unit, n, nrhs, 1, mut a, lda, mut b, ldb) + // Solve U * X = B, overwriting B with X. + blas.dtrsm(.left, .upper, .no_trans, .non_unit, n, nrhs, 1, mut a, lda, mut b, + ldb) + } + + // Solve Aᵀ * X = B. + // Solve Uᵀ * X = B, overwriting B with X. + blas.dtrsm(.left, .upper, .trans, .non_unit, n, nrhs, 1, mut a, lda, mut b, ldb) + // Solve Lᵀ * X = B, overwriting B with X. + blas.dtrsm(.left, .lower, .trans, .unit, n, nrhs, 1, mut a, lda, mut b, ldb) + dlaswp(nrhs, b, ldb, 0, n - 1, ipiv, -1) +} From f33a31b91b99c8a8fee9c8ec73ae81bfddcab57b Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 7 Jan 2024 03:00:59 -0300 Subject: [PATCH 05/33] Update LAPACKE backend status in README.md --- lapack/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lapack/README.md b/lapack/README.md index fccedfdfd..578ae936f 100644 --- a/lapack/README.md +++ b/lapack/README.md @@ -2,10 +2,10 @@ This package implements Linear Algebra routines in V. -| Backend | Description | Status | Compilation Flags | -| -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------ | ------------------- | -| BLAS | Pure V implementation | Stable | `NONE` | -| LAPACKE | LAPACKE is a C interface to LAPACK. It is a standard part of the LAPACK distribution. Check the section [LAPACKE Backend](#lapacke-backend) for more information. | Stable | `-d vsl_lapack_lapacke` | +| Backend | Description | Status | Compilation Flags | +| ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------ | ----------------------- | +| BLAS | Pure V implementation | WIP | `NONE` | +| LAPACKE | LAPACKE is a C interface to LAPACK. It is a standard part of the LAPACK distribution. Check the section [LAPACKE Backend](#lapacke-backend) for more information. | Stable | `-d vsl_lapack_lapacke` | Therefore, its routines are a little more _lower level_ than the ones in the package `vsl.la`. From 3a0af32cf74825daa7ea51636cfc99cf9334b1a8 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 7 Jan 2024 03:01:28 -0300 Subject: [PATCH 06/33] Refactor LAPACKE backend for improved performance --- lapack/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lapack/README.md b/lapack/README.md index 578ae936f..bbd4c7a82 100644 --- a/lapack/README.md +++ b/lapack/README.md @@ -11,7 +11,8 @@ Therefore, its routines are a little more _lower level_ than the ones in the pac ## LAPACKE Backend -We provide a backend for the LAPACKE library. This backend is probably the fastest one for all platforms +We provide a backend for the LAPACKE library. This backend is probably +the fastest one for all platforms but it requires the installation of the LAPACKE library. Use the compilation flag `-d vsl_lapack_lapacke` to use the LAPACKE backend From 11ed0751cac53c8a5abf9344de5511cf42986c81 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 7 Jan 2024 03:02:41 -0300 Subject: [PATCH 07/33] Refactor README files for BLAS and LAPACK backends --- blas/README.md | 3 ++- lapack/README.md | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/blas/README.md b/blas/README.md index a21bdf992..1413acca3 100644 --- a/blas/README.md +++ b/blas/README.md @@ -11,7 +11,8 @@ Therefore, its routines are a little more _lower level_ than the ones in the pac ## OpenBLAS Backend -We provide a backend for the OpenBLAS library. This backend is probably the fastest one for all platforms +We provide a backend for the OpenBLAS library. This backend is probably +the fastest one for all platforms but it requires the installation of the OpenBLAS library. Use the compilation flag `-d vsl_blas_cblas` to use the OpenBLAS backend diff --git a/lapack/README.md b/lapack/README.md index bbd4c7a82..2ee59d40f 100644 --- a/lapack/README.md +++ b/lapack/README.md @@ -2,10 +2,11 @@ This package implements Linear Algebra routines in V. -| Backend | Description | Status | Compilation Flags | -| ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------ | ----------------------- | -| BLAS | Pure V implementation | WIP | `NONE` | -| LAPACKE | LAPACKE is a C interface to LAPACK. It is a standard part of the LAPACK distribution. Check the section [LAPACKE Backend](#lapacke-backend) for more information. | Stable | `-d vsl_lapack_lapacke` | +| Backend | Description | Status | Compilation Flags | +| --------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | ----------------------- | ----------------- | +| BLAS | Pure V implementation | WIP | `NONE` | +| LAPACKE | LAPACKE is a C interface to LAPACK. It is a standard part of the LAPACK distribution. | +| Check the section [LAPACKE Backend](#lapacke-backend) for more information. | Stable | `-d vsl_lapack_lapacke` | Therefore, its routines are a little more _lower level_ than the ones in the package `vsl.la`. From c3d1e2ef5110178f3550b5507af7c087636d8ff9 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 7 Jan 2024 03:06:41 -0300 Subject: [PATCH 08/33] Refactor conversion functions to be public --- blas/conversions.v | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/blas/conversions.v b/blas/conversions.v index e917e4f58..553bcf16d 100644 --- a/blas/conversions.v +++ b/blas/conversions.v @@ -32,12 +32,12 @@ pub fn c_uplo(up bool) Uplo { } // l_uplo is a helper function to convert bool to Uplo -fn l_uplo(up bool) u8 { +pub fn l_uplo(up bool) u8 { return if up { `U` } else { `L` } } // job_vlr is a helper function to convert bool to char -fn job_vlr(do_calc bool) rune { +pub fn job_vlr(do_calc bool) rune { return if do_calc { `V` } else { `N` } } From 0832f6d71d9c3afe64e40cc49b62e80904fabe6f Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 24 Mar 2024 02:06:31 -0300 Subject: [PATCH 09/33] Refactor dgetrf function to use blocked algorithm --- lapack/lapack64/dgetrf.v | 28 ++++ lapack/lapack64/ilaenv.v | 272 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 300 insertions(+) create mode 100644 lapack/lapack64/ilaenv.v diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v index 370f7a5e1..9a1a10a25 100644 --- a/lapack/lapack64/dgetrf.v +++ b/lapack/lapack64/dgetrf.v @@ -48,4 +48,32 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) { if ipiv.len < mn { panic(bad_len_ipiv) } + + nb := ilaenv(1, 'DGETRF', ' ', m, n, -1, -1) + + if nb <= 1 || nb >= mn { + // use the unblocked algorithm. + return dgetf2(m, n, mut a, lda, ipiv) + } + + for j := 0; j < mn; j += nb { + jb := math.min(mn - j, nb) + + // factor diagonal and subdiagonal blocks and test for exact singularity. + dgetf2(m - j, jb, mut a[j * lda + j..], lda, ipiv[j..j + jb]) + + for i := j; i <= math.min(m - 1, j + jb - 1); i++ { + ipiv[i] += j + } + + // apply interchanges to columns 1..j-1. + dlaswp(j, mut a, lda, j, j + jb - 1, ipiv[..j + jb], 1) + + if j + jb < n { + // apply interchanges to columns 1..j-1. + mut slice := unsafe { a[j + jb..] } + dlaswp(j, mut slice, lda, j, j + jb, ipiv[..j + jb], 1) + // + } + } } diff --git a/lapack/lapack64/ilaenv.v b/lapack/lapack64/ilaenv.v new file mode 100644 index 000000000..cd075a0b8 --- /dev/null +++ b/lapack/lapack64/ilaenv.v @@ -0,0 +1,272 @@ +module lapack64 + +// ilaenv returns algorithm tuning parameters for the algorithm given by the +// input string. ispec specifies the parameter to return: +// +// 1: The optimal block size for a blocked algorithm. +// 2: The minimum block size for a blocked algorithm. +// 3: The block size of unprocessed data at which a blocked algorithm should +// crossover to an unblocked version. +// 4: The number of shifts. +// 5: The minimum column dimension for blocking to be used. +// 6: The crossover point for SVD (to use QR factorization or not). +// 7: The number of processors. +// 8: The crossover point for multi-shift in QR and QZ methods for non-symmetric eigenvalue problems. +// 9: Maximum size of the subproblems in divide-and-conquer algorithms. +// 10: ieee infinity and NaN arithmetic can be trusted not to trap. +// 11: ieee infinity arithmetic can be trusted not to trap. +// 12...16: parameters for Dhseqr and related functions. See Iparmq for more +// information. +// +// ilaenv is an internal routine. It is exported for testing purposes. +fn ilaenv(ispec int, name string, opts string, n1 int, n2 int, n3 int, n4 int) int { + // TODO(btracey): Replace this with a constant lookup? A list of constants? + sname := name[0] == `S` || name[0] == `D` + cname := name[0] == `C` || name[0] == `Z` + if !sname && !cname { + panic(bad_name) + } + + c2 := name[1..3] + c3 := name[3..6] + c4 := c3[1..3] + + match ispec { + 1 { + match c2 { + 'GE' { + match c3 { + 'TRF', 'TRI' { + return 64 + } + 'QRF', 'RQF', 'LQF', 'QLF', 'HRD', 'BRD' { + return 32 + } + else { + panic(bad_name) + } + } + } + 'PO' { + match c3 { + 'TRF' { + return 64 + } + else { + panic(bad_name) + } + } + } + 'SY', 'HE' { + match c3 { + 'TRF' { + return 64 + } + 'TRD', 'GST' { + return 32 + } + else { + panic(bad_name) + } + } + } + 'OR', 'UN' { + match c3[0] { + 'G', 'M' { + match c3[1..] { + 'QR', 'RQ', 'LQ', 'QL', 'HR', 'TR', 'BR' { + return 32 + } + else { + panic(bad_name) + } + } + } + else { + panic(bad_name) + } + } + } + 'GB', 'PB' { + // Assuming n4 and n2 are defined elsewhere in your code + match c3 { + 'TRF' { + // Replace `n4` and `n2` with actual variables + if sname { + // if n4 <= 64 { + // return 1 + // } + return 32 + } + // if n4 <= 64 { + // return 1 + // } + return 32 + } + else { + panic(bad_name) + } + } + } + 'PT', 'TR', 'LA' { + // Additional cases as per your original logic + } + 'ST' { + if sname && c3 == 'EBZ' { + return 1 + } + panic(bad_name) + } + else { + panic(bad_name) + } + } + } + 2 { + match c2 { + 'GE' { + match c3 { + 'QRF', 'RQF', 'LQF', 'QLF', 'HRD', 'BRD', 'TRI' { + if sname { + return 2 + } + return 2 + } + else { + panic(bad_name) + } + } + } + 'SY' { + match c3 { + 'TRF' { + if sname { + return 8 + } + return 8 + } + 'TRD' { + if sname { + return 2 + } + panic(bad_name) + } + else { + panic(bad_name) + } + } + } + 'HE' { + if c3 == 'TRD' { + return 2 + } + panic(bad_name) + } + 'OR', 'UN' { + if !sname { + panic(bad_name) + } + match c3[0] { + 'G', 'M' { + match c4 { + 'QR', 'RQ', 'LQ', 'QL', 'HR', 'TR', 'BR' { + return 2 + } + else { + panic(bad_name) + } + } + } + else { + panic(bad_name) + } + } + } + else { + panic(bad_name) + } + } + } + 3 { + match c2 { + 'GE' { + match c3 { + 'QRF', 'RQF', 'LQF', 'QLF', 'HRD', 'BRD' { + if sname { + return 128 + } + return 128 + } + else { + panic(bad_name) + } + } + } + 'SY', 'HE' { + if c3 == 'TRD' { + return 32 + } + panic(bad_name) + } + 'OR', 'UN' { + match c3[0] { + 'G' { + match c4 { + 'QR', 'RQ', 'LQ', 'QL', 'HR', 'TR', 'BR' { + return 128 + } + else { + panic(bad_name) + } + } + } + else { + panic(bad_name) + } + } + } + else { + panic(bad_name) + } + } + } + 4 { + // Used by xHSEQR + return 6 + } + 5 { + // Not used + return 2 + } + 6 { + // Used by xGELSS and xGESVD + // Assuming n1 and n2 are defined elsewhere in your code + // Replace `min(n1, n2)` with actual min calculation or function + return int(f64(min(n1, n2)) * 1.6) + } + 7 { + // Not used + return 1 + } + 8 { + // Used by xHSEQR + return 50 + } + 9 { + // Used by xGELSD and xGESDD + return 25 + } + 10, 11 { + // Go guarantees ieee + return 1 + } + 12, 13, 14, 15, 16 { + // dhseqr and related functions for eigenvalue problems. + return iparmq(ispec, name, opts, n1, n2, n3, n4) + } + else { + panic(bad_ispec) + } + } + return 0 +} From 88776da450e83f4393e26e5f41a69c2f222f1042 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 28 Apr 2024 02:35:53 -0300 Subject: [PATCH 10/33] Refactor dgetrf function to use blocked algorithm --- ...ck_lapacke copy.v => cflags_d_vsl_lapack_lapacke.v} | 0 lapack/lapack64/dgetrf.v | 10 +++++++++- ml/knn.v | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) rename lapack/{cflags_d_vsl_lapack_lapacke copy.v => cflags_d_vsl_lapack_lapacke.v} (100%) diff --git a/lapack/cflags_d_vsl_lapack_lapacke copy.v b/lapack/cflags_d_vsl_lapack_lapacke.v similarity index 100% rename from lapack/cflags_d_vsl_lapack_lapacke copy.v rename to lapack/cflags_d_vsl_lapack_lapacke.v diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v index 9a1a10a25..89aee7a63 100644 --- a/lapack/lapack64/dgetrf.v +++ b/lapack/lapack64/dgetrf.v @@ -73,7 +73,15 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) { // apply interchanges to columns 1..j-1. mut slice := unsafe { a[j + jb..] } dlaswp(j, mut slice, lda, j, j + jb, ipiv[..j + jb], 1) - // + + blas.dtstrf(.left, .lower, .notrans, .unit, jb, n - j - jb, 1, a[j * lda + j..], + lda, a[j * lda + j + jb..], lda) + + if j + jb < m { + blas.dgemm(.notrans, .notrans, m - j - jb, n - j - jb, jb, -1, a[(j + jb) * lda + j..], + lda, a[j * lda + j + jb..], lda, 1, a[(j + jb) * lda + j + jb..], + lda) + } } } } diff --git a/ml/knn.v b/ml/knn.v index 407cbfec0..c84db9397 100644 --- a/ml/knn.v +++ b/ml/knn.v @@ -112,7 +112,7 @@ pub struct PredictConfig { pub: max_iter int k int - to_pred []f64 + to_pred []f64 } // predict will find the `k` points nearest to the specified `to_pred`. From b84f2ba3e49c2d182114c44323094c0898c2c6ca Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 28 Apr 2024 02:48:28 -0300 Subject: [PATCH 11/33] Refactor dgetrf function to use blocked algorithm and fix variable naming --- lapack/lapack64/dgetrf.v | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v index 89aee7a63..0bacc4a85 100644 --- a/lapack/lapack64/dgetrf.v +++ b/lapack/lapack64/dgetrf.v @@ -71,16 +71,17 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) { if j + jb < n { // apply interchanges to columns 1..j-1. - mut slice := unsafe { a[j + jb..] } - dlaswp(j, mut slice, lda, j, j + jb, ipiv[..j + jb], 1) + mut slice1 := unsafe { a[j + jb..] } + dlaswp(j, mut slice1, lda, j, j + jb, ipiv[..j + jb], 1) + mut slice2 := unsafe { a[j * lda + j + jb..] } blas.dtstrf(.left, .lower, .notrans, .unit, jb, n - j - jb, 1, a[j * lda + j..], - lda, a[j * lda + j + jb..], lda) + lda, mut slice2, lda) if j + jb < m { + mut slice3 := unsafe { a[(j + jb) * lda + j + jb..] } blas.dgemm(.notrans, .notrans, m - j - jb, n - j - jb, jb, -1, a[(j + jb) * lda + j..], - lda, a[j * lda + j + jb..], lda, 1, a[(j + jb) * lda + j + jb..], - lda) + lda, a[j * lda + j + jb..], lda, 1, mut slice3, lda) } } } From ab34e62cbd8680190154379bf8875b6c72180497 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 28 Apr 2024 03:11:21 -0300 Subject: [PATCH 12/33] Refactor LAPACK functions to use row-major memory layout --- ..._common.v => lapack_d_vsl_lapack_common.v} | 0 lapack/lapack_notd_vsl_lapack_common.v | 184 ++++++++++++++++++ 2 files changed, 184 insertions(+) rename lapack/{lapack_common.v => lapack_d_vsl_lapack_common.v} (100%) create mode 100644 lapack/lapack_notd_vsl_lapack_common.v diff --git a/lapack/lapack_common.v b/lapack/lapack_d_vsl_lapack_common.v similarity index 100% rename from lapack/lapack_common.v rename to lapack/lapack_d_vsl_lapack_common.v diff --git a/lapack/lapack_notd_vsl_lapack_common.v b/lapack/lapack_notd_vsl_lapack_common.v new file mode 100644 index 000000000..3635ed6cd --- /dev/null +++ b/lapack/lapack_notd_vsl_lapack_common.v @@ -0,0 +1,184 @@ +module lapack + +import vsl.errors +import vsl.blas +import vsl.lapack.lapack64 + +fn C.LAPACKE_dgesvd(matrix_layout blas.MemoryLayout, jobu &char, jobvt &char, m int, n int, a &f64, lda int, s &f64, u &f64, ldu int, vt &f64, ldvt int, superb &f64) int + +fn C.LAPACKE_dgetri(matrix_layout blas.MemoryLayout, n int, a &f64, lda int, ipiv &int) int + +fn C.LAPACKE_dpotrf(matrix_layout blas.MemoryLayout, up u32, n int, a &f64, lda int) int + +fn C.LAPACKE_dgeev(matrix_layout blas.MemoryLayout, calc_vl &char, calc_vr &char, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int + +fn C.LAPACKE_dsyev(matrix_layout blas.MemoryLayout, jobz byte, uplo byte, n int, a &f64, lda int, w &f64, work &f64, lwork int) int + +fn C.LAPACKE_dgebal(matrix_layout blas.MemoryLayout, job &char, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int + +fn C.LAPACKE_dgehrd(matrix_layout blas.MemoryLayout, n int, ilo int, ihi int, a &f64, lda int, tau &f64, work &f64, lwork int) int + +// dgesv computes the solution to a real system of linear equations. +// +// See: http://www.netlib.org/lapack/explore-html/d8/d72/dgesv_8f.html +// +// See: https://software.intel.com/en-us/mkl-developer-reference-c-gesv +// +// The system is: +// +// A * X = B, +// +// where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +// +// The LU decomposition with partial pivoting and row interchanges is +// used to factor A as +// +// A = P * L * U, +// +// where P is a permutation matrix, L is unit lower triangular, and U is +// upper triangular. The factored form of A is then used to solve the +// system of equations A * X = B. +// +// NOTE: matrix 'a' will be modified +@[inline] +pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb int) { + lapack64.dgesv(n, nrhs, mut a, lda, ipiv, mut b, ldb) +} + +// dgesvd computes the singular value decomposition (SVD) of a real M-by-N matrix A, optionally computing the left and/or right singular vectors. +// +// See: http://www.netlib.org/lapack/explore-html/d8/d2d/dgesvd_8f.html +// +// See: https://software.intel.com/en-us/mkl-developer-reference-c-gesvd +// +// The SVD is written +// +// A = U * SIGMA * transpose(V) +// +// where SIGMA is an M-by-N matrix which is zero except for its +// min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and +// V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA +// are the singular values of A; they are real and non-negative, and +// are returned in descending order. The first min(m,n) columns of +// U and V are the left and right singular vectors of A. +// +// Note that the routine returns V**T, not V. +// +// NOTE: matrix 'a' will be modified +pub fn dgesvd(jobu &char, jobvt &char, m int, n int, a []f64, lda int, s []f64, u []f64, ldu int, vt []f64, ldvt int, superb []f64) { + info := C.LAPACKE_dgesvd(.row_major, jobu, jobvt, m, n, &a[0], lda, &s[0], &u[0], + ldu, &vt[0], ldvt, &superb[0]) + if info != 0 { + errors.vsl_panic('lapack failed', .efailed) + } +} + +// dgetrf computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. +// +// See: http://www.netlib.org/lapack/explore-html/d3/d6a/dgetrf_8f.html +// +// See: https://software.intel.com/en-us/mkl-developer-reference-c-getrf +// +// The factorization has the form +// A = P * L * U +// where P is a permutation matrix, L is lower triangular with unit +// diagonal elements (lower trapezoidal if m > n), and U is upper +// triangular (upper trapezoidal if m < n). +// +// This is the right-looking Level 3 BLAS version of the algorithm. +// +// NOTE: (1) matrix 'a' will be modified +// (2) ipiv indices are 1-based (i.e. Fortran) +pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) { + lapack64.dgetrf(m, n, mut a, lda, ipiv) +} + +// dgetri computes the inverse of a matrix using the LU factorization computed by DGETRF. +// +// See: http://www.netlib.org/lapack/explore-html/df/da4/dgetri_8f.html +// +// See: https://software.intel.com/en-us/mkl-developer-reference-c-getri +// +// This method inverts U and then computes inv(A) by solving the system +// inv(A)*L = inv(U) for inv(A). +pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) { + unsafe { + info := C.LAPACKE_dgetri(.row_major, n, &a[0], lda, &ipiv[0]) + if info != 0 { + errors.vsl_panic('lapack failed', .efailed) + } + } +} + +// dpotrf computes the Cholesky factorization of a real symmetric positive definite matrix A. +// +// See: http://www.netlib.org/lapack/explore-html/d0/d8a/dpotrf_8f.html +// +// See: https://software.intel.com/en-us/mkl-developer-reference-c-potrf +// +// The factorization has the form +// +// A = U**T * U, if UPLO = 'U' +// +// or +// +// A = L * L**T, if UPLO = 'L' +// +// where U is an upper triangular matrix and L is lower triangular. +// +// This is the block version of the algorithm, calling Level 3 BLAS. +pub fn dpotrf(up bool, n int, mut a []f64, lda int) { + unsafe { + info := C.LAPACKE_dpotrf(.row_major, blas.l_uplo(up), n, &a[0], lda) + if info != 0 { + errors.vsl_panic('lapack failed', .efailed) + } + } +} + +// dgeev computes for an N-by-N real nonsymmetric matrix A, the +// eigenvalues and, optionally, the left and/or right eigenvectors. +// +// See: http://www.netlib.org/lapack/explore-html/d9/d28/dgeev_8f.html +// +// See: https://software.intel.com/en-us/mkl-developer-reference-c-geev +// +// See: https://www.nag.co.uk/numeric/fl/nagdoc_fl26/html/f08/f08naf.html +// +// The right eigenvector v(j) of A satisfies +// +// A * v(j) = lambda(j) * v(j) +// +// where lambda(j) is its eigenvalue. +// +// The left eigenvector u(j) of A satisfies +// +// u(j)**H * A = lambda(j) * u(j)**H +// +// where u(j)**H denotes the conjugate-transpose of u(j). +// +// The computed eigenvectors are normalized to have Euclidean norm +// equal to 1 and largest component real. +pub fn dgeev(calc_vl bool, calc_vr bool, n int, mut a []f64, lda int, wr []f64, wi []f64, vl []f64, ldvl_ int, vr []f64, ldvr_ int) { + mut vvl := 0.0 + mut vvr := 0.0 + mut ldvl := ldvl_ + mut ldvr := ldvr_ + if calc_vl { + vvl = vl[0] + } else { + ldvl = 1 + } + if calc_vr { + vvr = vr[0] + } else { + ldvr = 1 + } + unsafe { + info := C.LAPACKE_dgeev(.row_major, &char(blas.job_vlr(calc_vl).str().str), &char(blas.job_vlr(calc_vr).str().str), + n, &a[0], lda, &wr[0], &wi[0], &vvl, ldvl, &vvr, ldvr) + if info != 0 { + errors.vsl_panic('lapack failed', .efailed) + } + } +} From 14d3f672380826f5232ec2d5d2aab5f0839f271a Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 28 Apr 2024 03:14:07 -0300 Subject: [PATCH 13/33] Refactor LAPACK module to use lapack64 module --- lapack/lapack64/dgesv.v | 2 +- lapack/lapack64/dgetrf.v | 2 +- lapack/lapack64/dgetrs.v | 2 +- lapack/lapack64/errors.v | 2 +- lapack/lapack64/ilaenv.v | 6 +++--- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lapack/lapack64/dgesv.v b/lapack/lapack64/dgesv.v index 03bbb93d3..d0d7f90a5 100644 --- a/lapack/lapack64/dgesv.v +++ b/lapack/lapack64/dgesv.v @@ -1,4 +1,4 @@ -module lapack +module lapack64 import math import vsl.blas diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v index 0bacc4a85..e1b6553fc 100644 --- a/lapack/lapack64/dgetrf.v +++ b/lapack/lapack64/dgetrf.v @@ -1,4 +1,4 @@ -module lapack +module lapack64 import math import vsl.blas diff --git a/lapack/lapack64/dgetrs.v b/lapack/lapack64/dgetrs.v index e26b661c8..71d219723 100644 --- a/lapack/lapack64/dgetrs.v +++ b/lapack/lapack64/dgetrs.v @@ -1,4 +1,4 @@ -module lapack +module lapack64 import math import vsl.blas diff --git a/lapack/lapack64/errors.v b/lapack/lapack64/errors.v index d17defc08..91a96995f 100644 --- a/lapack/lapack64/errors.v +++ b/lapack/lapack64/errors.v @@ -1,4 +1,4 @@ -module lapack +module lapack64 // This list is duplicated in netlib/lapack/netlib. Keep in sync. diff --git a/lapack/lapack64/ilaenv.v b/lapack/lapack64/ilaenv.v index cd075a0b8..52d2e4e7d 100644 --- a/lapack/lapack64/ilaenv.v +++ b/lapack/lapack64/ilaenv.v @@ -72,7 +72,7 @@ fn ilaenv(ispec int, name string, opts string, n1 int, n2 int, n3 int, n4 int) i } 'OR', 'UN' { match c3[0] { - 'G', 'M' { + `G`, `M` { match c3[1..] { 'QR', 'RQ', 'LQ', 'QL', 'HR', 'TR', 'BR' { return 32 @@ -167,7 +167,7 @@ fn ilaenv(ispec int, name string, opts string, n1 int, n2 int, n3 int, n4 int) i panic(bad_name) } match c3[0] { - 'G', 'M' { + `G`, `M` { match c4 { 'QR', 'RQ', 'LQ', 'QL', 'HR', 'TR', 'BR' { return 2 @@ -210,7 +210,7 @@ fn ilaenv(ispec int, name string, opts string, n1 int, n2 int, n3 int, n4 int) i } 'OR', 'UN' { match c3[0] { - 'G' { + `G` { match c4 { 'QR', 'RQ', 'LQ', 'QL', 'HR', 'TR', 'BR' { return 128 From 02da1671ed19780d29c5e1f80441091d852687e9 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 28 Apr 2024 03:18:30 -0300 Subject: [PATCH 14/33] Refactor dgetrs function to use f64 instead of float64 for array types --- lapack/lapack64/dgetrs.v | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lapack/lapack64/dgetrs.v b/lapack/lapack64/dgetrs.v index 71d219723..0458cc1e2 100644 --- a/lapack/lapack64/dgetrs.v +++ b/lapack/lapack64/dgetrs.v @@ -16,7 +16,7 @@ import vsl.blas // // a and ipiv contain the LU factorization of A and the permutation indices as // computed by Dgetrf. ipiv is zero-indexed. -pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []float64, lda int, ipiv []int, mut b []float64, ldb int) { +pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb int) { if trans != .no_trans && trans != .trans && trans != .conj_trans { panic(bad_trans) } From e43bfa574a4f026c01d8106d9824a43c5e4e1eae Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Tue, 18 Jun 2024 01:41:06 -0300 Subject: [PATCH 15/33] refactor: Update create_image_2d function to use local variable for format --- blas/oblas_d_vsl_blas_cblas.v | 648 +++++++++++++++++- la/matrix_ops.v | 2 +- lapack/conversions.v | 70 ++ lapack/lapack64/conversions.v | 199 ++++++ lapack/lapack64/dgebal.v | 33 + lapack/lapack64/dgeev.v | 39 ++ lapack/lapack64/dgehrd.v | 35 + lapack/lapack64/dgesv.v | 4 +- lapack/lapack64/dgesvd.v | 42 ++ lapack/lapack64/dgetrf.v | 11 +- lapack/lapack64/dgetri.v | 31 + lapack/lapack64/dgetrs.v | 9 +- lapack/lapack64/dpotrf.v | 33 + lapack/lapack64/dsyev.v | 38 + lapack/lapack64/ilaenv.v | 4 +- ...common.v => lapack_d_vsl_lapack_lapacke.v} | 30 +- ...mon.v => lapack_notd_vsl_lapack_lapacke.v} | 56 +- 17 files changed, 1201 insertions(+), 83 deletions(-) create mode 100644 lapack/conversions.v create mode 100644 lapack/lapack64/conversions.v create mode 100644 lapack/lapack64/dgebal.v create mode 100644 lapack/lapack64/dgeev.v create mode 100644 lapack/lapack64/dgehrd.v create mode 100644 lapack/lapack64/dgesvd.v create mode 100644 lapack/lapack64/dgetri.v create mode 100644 lapack/lapack64/dpotrf.v create mode 100644 lapack/lapack64/dsyev.v rename lapack/{lapack_d_vsl_lapack_common.v => lapack_d_vsl_lapack_lapacke.v} (78%) rename lapack/{lapack_notd_vsl_lapack_common.v => lapack_notd_vsl_lapack_lapacke.v} (70%) diff --git a/blas/oblas_d_vsl_blas_cblas.v b/blas/oblas_d_vsl_blas_cblas.v index 29be3be06..777038f3d 100644 --- a/blas/oblas_d_vsl_blas_cblas.v +++ b/blas/oblas_d_vsl_blas_cblas.v @@ -221,6 +221,16 @@ pub fn dasum(n int, x []f64, incx int) f64 { return C.cblas_dasum(n, unsafe { &x[0] }, incx) } +@[inline] +pub fn scasum(n int, x voidptr, incx int) f32 { + return C.cblas_scasum(n, x, incx) +} + +@[inline] +pub fn dzasum(n int, x voidptr, incx int) f64 { + return C.cblas_dzasum(n, x, incx) +} + @[inline] pub fn ssum(n int, x []f32, incx int) f32 { return C.cblas_ssum(n, unsafe { &x[0] }, incx) @@ -241,6 +251,16 @@ pub fn dnrm2(n int, x []f64, incx int) f64 { return C.cblas_dnrm2(n, unsafe { &x[0] }, incx) } +@[inline] +pub fn scnrm2(n int, x voidptr, incx int) f32 { + return C.cblas_scnrm2(n, x, incx) +} + +@[inline] +pub fn dznrm2(n int, x voidptr, incx int) f64 { + return C.cblas_dznrm2(n, x, incx) +} + @[inline] pub fn isamax(n int, x []f32, incx int) int { return C.cblas_isamax(n, unsafe { &x[0] }, incx) @@ -251,16 +271,36 @@ pub fn idamax(n int, x []f64, incx int) int { return C.cblas_idamax(n, unsafe { &x[0] }, incx) } +@[inline] +pub fn icamax(n int, x voidptr, incx int) int { + return C.cblas_icamax(n, x, incx) +} + +@[inline] +pub fn izamax(n int, x voidptr, incx int) int { + return C.cblas_izamax(n, x, incx) +} + @[inline] pub fn isamin(n int, x []f32, incx int) int { return C.cblas_isamin(n, unsafe { &x[0] }, incx) } @[inline] -pub fn idamin(n int, x &f64, incx int) int { +pub fn idamin(n int, x []f64, incx int) int { return C.cblas_idamin(n, unsafe { &x[0] }, incx) } +@[inline] +pub fn icamin(n int, x voidptr, incx int) int { + return C.cblas_icamin(n, x, incx) +} + +@[inline] +pub fn izamin(n int, x voidptr, incx int) int { + return C.cblas_izamin(n, x, incx) +} + @[inline] pub fn ismax(n int, x []f32, incx int) int { return C.cblas_ismax(n, unsafe { &x[0] }, incx) @@ -271,6 +311,16 @@ pub fn idmax(n int, x []f64, incx int) int { return C.cblas_idmax(n, unsafe { &x[0] }, incx) } +@[inline] +pub fn icmax(n int, x voidptr, incx int) int { + return C.cblas_icmax(n, x, incx) +} + +@[inline] +pub fn izmax(n int, x voidptr, incx int) int { + return C.cblas_izmax(n, x, incx) +} + @[inline] pub fn ismin(n int, x []f32, incx int) int { return C.cblas_ismin(n, unsafe { &x[0] }, incx) @@ -281,6 +331,16 @@ pub fn idmin(n int, x []f64, incx int) int { return C.cblas_idmin(n, unsafe { &x[0] }, incx) } +@[inline] +pub fn icmin(n int, x voidptr, incx int) int { + return C.cblas_icmin(n, x, incx) +} + +@[inline] +pub fn izmin(n int, x voidptr, incx int) int { + return C.cblas_izmin(n, x, incx) +} + @[inline] pub fn saxpy(n int, alpha f32, x []f32, incx int, mut y []f32, incy int) { C.cblas_saxpy(n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) @@ -292,15 +352,35 @@ pub fn daxpy(n int, alpha f64, x []f64, incx int, mut y []f64, incy int) { } @[inline] -pub fn scopy(n int, mut x []f32, incx int, mut y []f32, incy int) { +pub fn caxpy(n int, alpha voidptr, x voidptr, incx int, mut y voidptr, incy int) { + C.cblas_caxpy(n, alpha, x, incx, y, incy) +} + +@[inline] +pub fn zaxpy(n int, alpha voidptr, x voidptr, incx int, mut y voidptr, incy int) { + C.cblas_zaxpy(n, alpha, x, incx, y, incy) +} + +@[inline] +pub fn scopy(n int, x []f32, incx int, mut y []f32, incy int) { C.cblas_scopy(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) } @[inline] -pub fn dcopy(n int, mut x []f64, incx int, mut y []f64, incy int) { +pub fn dcopy(n int, x []f64, incx int, mut y []f64, incy int) { C.cblas_dcopy(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) } +@[inline] +pub fn ccopy(n int, x voidptr, incx int, mut y voidptr, incy int) { + C.cblas_ccopy(n, x, incx, y, incy) +} + +@[inline] +pub fn zcopy(n int, x voidptr, incx int, mut y voidptr, incy int) { + C.cblas_zcopy(n, x, incx, y, incy) +} + @[inline] pub fn sswap(n int, mut x []f32, incx int, mut y []f32, incy int) { C.cblas_sswap(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) @@ -311,6 +391,16 @@ pub fn dswap(n int, mut x []f64, incx int, mut y []f64, incy int) { C.cblas_dswap(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy) } +@[inline] +pub fn cswap(n int, x voidptr, incx int, y voidptr, incy int) { + C.cblas_cswap(n, x, incx, y, incy) +} + +@[inline] +pub fn zswap(n int, x voidptr, incx int, y voidptr, incy int) { + C.cblas_zswap(n, x, incx, y, incy) +} + @[inline] pub fn srot(n int, mut x []f32, incx int, mut y []f32, incy int, c f32, s f32) { C.cblas_srot(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, c, s) @@ -332,22 +422,22 @@ pub fn drotg(a f64, b f64, c f64, s f64) { } @[inline] -pub fn srotm(n int, x []f32, incx int, y []f32, incy int, p []f32) { +pub fn srotm(n int, mut x []f32, incx int, mut y []f32, incy int, p []f32) { C.cblas_srotm(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, unsafe { &p[0] }) } @[inline] -pub fn drotm(n int, x []f64, incx int, y []f64, incy int, p []f64) { +pub fn drotm(n int, mut x []f64, incx int, mut y []f64, incy int, p []f64) { C.cblas_drotm(n, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, unsafe { &p[0] }) } @[inline] -pub fn srotmg(d1 f32, d2 f32, b1 f32, b2 f32, p []f32) { +pub fn srotmg(d1 f32, d2 f32, b1 f32, b2 f32, mut p []f32) { C.cblas_srotmg(&d1, &d2, &b1, b2, unsafe { &p[0] }) } @[inline] -pub fn drotmg(d1 f64, d2 f64, b1 f64, b2 f32, p []f64) { +pub fn drotmg(d1 f64, d2 f64, b1 f64, b2 f32, mut p []f64) { C.cblas_drotmg(&d1, &d2, &b1, b2, unsafe { &p[0] }) } @@ -361,6 +451,26 @@ pub fn dscal(n int, alpha f64, mut x []f64, incx int) { C.cblas_dscal(n, alpha, unsafe { &x[0] }, incx) } +@[inline] +pub fn cscal(n int, alpha voidptr, mut x voidptr, incx int) { + C.cblas_cscal(n, alpha, x, incx) +} + +@[inline] +pub fn zscal(n int, alpha voidptr, mut x voidptr, incx int) { + C.cblas_zscal(n, alpha, x, incx) +} + +@[inline] +pub fn csscal(n int, alpha f32, mut x voidptr, incx int) { + C.cblas_csscal(n, alpha, x, incx) +} + +@[inline] +pub fn zdscal(n int, alpha f64, mut x voidptr, incx int) { + C.cblas_zdscal(n, alpha, x, incx) +} + @[inline] pub fn sgemv(trans bool, m int, n int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) { C.cblas_sgemv(.row_major, c_trans(trans), m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, @@ -373,6 +483,16 @@ pub fn dgemv(trans bool, m int, n int, alpha f64, a []f64, lda int, x []f64, inc incx, beta, unsafe { &y[0] }, incy) } +@[inline] +pub fn cgemv(trans bool, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_cgemv(.row_major, c_trans(trans), m, n, alpha, a, lda, x, incx, beta, y, incy) +} + +@[inline] +pub fn zgemv(trans bool, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_zgemv(.row_major, c_trans(trans), m, n, alpha, a, lda, x, incx, beta, y, incy) +} + @[inline] pub fn sger(m int, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32, lda int) { C.cblas_sger(.row_major, m, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, @@ -386,29 +506,69 @@ pub fn dger(m int, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a } @[inline] -pub fn strsv(uplo bool, trans_a bool, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) { - C.cblas_strsv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] }, +pub fn cgeru(m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) { + C.cblas_cgeru(.row_major, m, n, alpha, x, incx, y, incy, a, lda) +} + +@[inline] +pub fn cgerc(m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) { + C.cblas_cgerc(.row_major, m, n, alpha, x, incx, y, incy, a, lda) +} + +@[inline] +pub fn zgeru(m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) { + C.cblas_zgeru(.row_major, m, n, alpha, x, incx, y, incy, a, lda) +} + +@[inline] +pub fn zgerc(m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) { + C.cblas_zgerc(.row_major, m, n, alpha, x, incx, y, incy, a, lda) +} + +@[inline] +pub fn strsv(uplo bool, trans bool, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) { + C.cblas_strsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &a[0] }, lda, unsafe { &x[0] }, incx) } @[inline] -pub fn dtrsv(uplo bool, trans_a bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { - C.cblas_dtrsv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] }, +pub fn dtrsv(uplo bool, trans bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { + C.cblas_dtrsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &a[0] }, lda, unsafe { &x[0] }, incx) } @[inline] -pub fn strmv(uplo bool, trans_a bool, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) { - C.cblas_strmv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] }, +pub fn ctrsv(uplo bool, trans bool, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) { + C.cblas_ctrsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, a, lda, x, incx) +} + +@[inline] +pub fn ztrsv(uplo bool, trans bool, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) { + C.cblas_ztrsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, a, lda, x, incx) +} + +@[inline] +pub fn strmv(uplo bool, trans bool, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) { + C.cblas_strmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &a[0] }, lda, unsafe { &x[0] }, incx) } @[inline] -pub fn dtrmv(uplo bool, trans_a bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { - C.cblas_dtrmv(.row_major, c_uplo(uplo), c_trans(trans_a), diag, n, unsafe { &a[0] }, +pub fn dtrmv(uplo bool, trans bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { + C.cblas_dtrmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &a[0] }, lda, unsafe { &x[0] }, incx) } +@[inline] +pub fn ctrmv(uplo bool, trans bool, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) { + C.cblas_ctrmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, a, lda, x, incx) +} + +@[inline] +pub fn ztrmv(uplo bool, trans bool, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) { + C.cblas_ztrmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, a, lda, x, incx) +} + @[inline] pub fn ssyr(uplo bool, n int, alpha f32, x []f32, incx int, mut a []f32, lda int) { C.cblas_ssyr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] }, @@ -421,6 +581,16 @@ pub fn dsyr(uplo bool, n int, alpha f64, x []f64, incx int, mut a []f64, lda int lda) } +@[inline] +pub fn cher(uplo bool, n int, alpha f32, x voidptr, incx int, mut a voidptr, lda int) { + C.cblas_cher(.row_major, c_uplo(uplo), n, alpha, x, incx, a, lda) +} + +@[inline] +pub fn zher(uplo bool, n int, alpha f64, x voidptr, incx int, mut a voidptr, lda int) { + C.cblas_zher(.row_major, c_uplo(uplo), n, alpha, x, incx, a, lda) +} + @[inline] pub fn ssyr2(uplo bool, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32, lda int) { C.cblas_ssyr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, @@ -434,9 +604,451 @@ pub fn dsyr2(uplo bool, n int, alpha f64, x []f64, incx int, y []f64, incy int, } @[inline] -pub fn sgemm(trans_a bool, trans_b bool, m int, n int, k int, alpha f32, a []f32, lda int, b []f32, ldb int, beta f32, mut cc []f32, ldc int) { - C.cblas_sgemm(.row_major, c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, unsafe { &a[0] }, - lda, unsafe { &b[0] }, ldb, beta, unsafe { &cc[0] }, ldc) +pub fn cher2(uplo bool, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) { + C.cblas_cher2(.row_major, c_uplo(uplo), n, alpha, x, incx, y, incy, a, lda) +} + +@[inline] +pub fn zher2(uplo bool, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) { + C.cblas_zher2(.row_major, c_uplo(uplo), n, alpha, x, incx, y, incy, a, lda) +} + +@[inline] +pub fn sgbmv(trans bool, m int, n int, kl int, ku int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) { + C.cblas_sgbmv(.row_major, c_trans(trans), m, n, kl, ku, alpha, unsafe { &a[0] }, lda, + unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn dgbmv(trans bool, m int, n int, kl int, ku int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { + C.cblas_dgbmv(.row_major, c_trans(trans), m, n, kl, ku, alpha, unsafe { &a[0] }, lda, + unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn cgbmv(trans bool, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_cgbmv(.row_major, c_trans(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta, + y, incy) +} + +@[inline] +pub fn zgbmv(trans bool, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_zgbmv(.row_major, c_trans(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta, + y, incy) +} + +@[inline] +pub fn ssbmv(uplo bool, n int, k int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) { + C.cblas_ssbmv(.row_major, c_uplo(uplo), n, k, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, + incx, beta, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn dsbmv(uplo bool, n int, k int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { + C.cblas_dsbmv(.row_major, c_uplo(uplo), n, k, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, + incx, beta, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn stbmv(uplo bool, trans bool, diag Diagonal, n int, k int, a []f32, lda int, mut x []f32, incx int) { + C.cblas_stbmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, unsafe { &a[0] }, + lda, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn dtbmv(uplo bool, trans bool, diag Diagonal, n int, k int, a []f64, lda int, mut x []f64, incx int) { + C.cblas_dtbmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, unsafe { &a[0] }, + lda, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn ctbmv(uplo bool, trans bool, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) { + C.cblas_ctbmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, a, lda, x, incx) +} + +@[inline] +pub fn ztbmv(uplo bool, trans bool, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) { + C.cblas_ztbmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, a, lda, x, incx) +} + +@[inline] +pub fn stbsv(uplo bool, trans bool, diag Diagonal, n int, k int, a []f32, lda int, mut x []f32, incx int) { + C.cblas_stbsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, unsafe { &a[0] }, + lda, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn dtbsv(uplo bool, trans bool, diag Diagonal, n int, k int, a []f64, lda int, mut x []f64, incx int) { + C.cblas_dtbsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, unsafe { &a[0] }, + lda, unsafe { &x[0] }, incx) +} + +@[inline] +pub fn ctbsv(uplo bool, trans bool, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) { + C.cblas_ctbsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, a, lda, x, incx) +} + +@[inline] +pub fn ztbsv(uplo bool, trans bool, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) { + C.cblas_ztbsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, a, lda, x, incx) +} + +@[inline] +pub fn stpmv(uplo bool, trans bool, diag Diagonal, n int, ap []f32, mut x []f32, incx int) { + C.cblas_stpmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &ap[0] }, + unsafe { &x[0] }, incx) +} + +@[inline] +pub fn dtpmv(uplo bool, trans bool, diag Diagonal, n int, ap []f64, mut x []f64, incx int) { + C.cblas_dtpmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &ap[0] }, + unsafe { &x[0] }, incx) +} + +@[inline] +pub fn ctpmv(uplo bool, trans bool, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) { + C.cblas_ctpmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, ap, x, incx) +} + +@[inline] +pub fn ztpmv(uplo bool, trans bool, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) { + C.cblas_ztpmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, ap, x, incx) +} + +@[inline] +pub fn stpsv(uplo bool, trans bool, diag Diagonal, n int, ap []f32, mut x []f32, incx int) { + C.cblas_stpsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &ap[0] }, + unsafe { &x[0] }, incx) +} + +@[inline] +pub fn dtpsv(uplo bool, trans bool, diag Diagonal, n int, ap []f64, mut x []f64, incx int) { + C.cblas_dtpsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &ap[0] }, + unsafe { &x[0] }, incx) +} + +@[inline] +pub fn ctpsv(uplo bool, trans bool, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) { + C.cblas_ctpsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, ap, x, incx) +} + +@[inline] +pub fn ztpsv(uplo bool, trans bool, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) { + C.cblas_ztpsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, ap, x, incx) +} + +@[inline] +pub fn ssymv(uplo bool, n int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) { + C.cblas_ssymv(.row_major, c_uplo(uplo), n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, + incx, beta, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn dsymv(uplo bool, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { + C.cblas_dsymv(.row_major, c_uplo(uplo), n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, + incx, beta, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn chemv(uplo bool, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_chemv(.row_major, c_uplo(uplo), n, alpha, a, lda, x, incx, beta, y, incy) +} + +@[inline] +pub fn zhemv(uplo bool, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_zhemv(.row_major, c_uplo(uplo), n, alpha, a, lda, x, incx, beta, y, incy) +} + +@[inline] +pub fn sspmv(uplo bool, n int, alpha f32, ap []f32, x []f32, incx int, beta f32, mut y []f32, incy int) { + C.cblas_sspmv(.row_major, c_uplo(uplo), n, alpha, unsafe { &ap[0] }, unsafe { &x[0] }, + incx, beta, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn dspmv(uplo bool, n int, alpha f64, ap []f64, x []f64, incx int, beta f64, mut y []f64, incy int) { + C.cblas_dspmv(.row_major, c_uplo(uplo), n, alpha, unsafe { &ap[0] }, unsafe { &x[0] }, + incx, beta, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn sspr(uplo bool, n int, alpha f32, x []f32, incx int, mut ap []f32) { + C.cblas_sspr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &ap[0] }) +} + +@[inline] +pub fn dspr(uplo bool, n int, alpha f64, x []f64, incx int, mut ap []f64) { + C.cblas_dspr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &ap[0] }) +} + +@[inline] +pub fn chpr(uplo bool, n int, alpha f32, x voidptr, incx int, mut a voidptr) { + C.cblas_chpr(.row_major, c_uplo(uplo), n, alpha, x, incx, a) +} + +@[inline] +pub fn zhpr(uplo bool, n int, alpha f64, x voidptr, incx int, mut a voidptr) { + C.cblas_zhpr(.row_major, c_uplo(uplo), n, alpha, x, incx, a) +} + +@[inline] +pub fn sspr2(uplo bool, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32) { + C.cblas_sspr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, + incy, unsafe { &a[0] }) +} + +@[inline] +pub fn dspr2(uplo bool, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64) { + C.cblas_dspr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, + incy, unsafe { &a[0] }) +} + +@[inline] +pub fn chpr2(uplo bool, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut ap voidptr) { + C.cblas_chpr2(.row_major, c_uplo(uplo), n, alpha, x, incx, y, incy, ap) +} + +@[inline] +pub fn zhpr2(uplo bool, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut ap voidptr) { + C.cblas_zhpr2(.row_major, c_uplo(uplo), n, alpha, x, incx, y, incy, ap) +} + +@[inline] +pub fn chbmv(uplo bool, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_chbmv(.row_major, c_uplo(uplo), n, k, alpha, a, lda, x, incx, beta, y, incy) +} + +@[inline] +pub fn zhbmv(uplo bool, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_zhbmv(.row_major, c_uplo(uplo), n, k, alpha, a, lda, x, incx, beta, y, incy) +} + +@[inline] +pub fn chpmv(uplo bool, n int, alpha voidptr, ap voidptr, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_chpmv(.row_major, c_uplo(uplo), n, alpha, ap, x, incx, beta, y, incy) +} + +@[inline] +pub fn zhpmv(uplo bool, n int, alpha voidptr, ap voidptr, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_zhpmv(.row_major, c_uplo(uplo), n, alpha, ap, x, incx, beta, y, incy) +} + +@[inline] +pub fn ssyrk(uplo bool, trans bool, n int, k int, alpha f32, a []f32, lda int, beta f32, mut c []f32, ldc int) { + C.cblas_ssyrk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, unsafe { &a[0] }, + lda, beta, unsafe { &c[0] }, ldc) +} + +@[inline] +pub fn dsyrk(uplo bool, trans bool, n int, k int, alpha f64, a []f64, lda int, beta f64, mut c []f64, ldc int) { + C.cblas_dsyrk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, unsafe { &a[0] }, + lda, beta, unsafe { &c[0] }, ldc) +} + +@[inline] +pub fn csyrk(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, mut c voidptr, ldc int) { + C.cblas_csyrk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, beta, + c, ldc) +} + +@[inline] +pub fn zsyrk(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, mut c voidptr, ldc int) { + C.cblas_zsyrk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, beta, + c, ldc) +} + +@[inline] +pub fn ssyr2k(uplo bool, trans bool, n int, k int, alpha f32, a []f32, lda int, b []f32, ldb int, beta f32, mut c []f32, ldc int) { + C.cblas_ssyr2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, unsafe { &a[0] }, + lda, unsafe { &b[0] }, ldb, beta, unsafe { &c[0] }, ldc) +} + +@[inline] +pub fn dsyr2k(uplo bool, trans bool, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut c []f64, ldc int) { + C.cblas_dsyr2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, unsafe { &a[0] }, + lda, unsafe { &b[0] }, ldb, beta, unsafe { &c[0] }, ldc) +} + +@[inline] +pub fn csyr2k(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) { + C.cblas_csyr2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, b, ldb, + beta, c, ldc) +} + +@[inline] +pub fn zsyr2k(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) { + C.cblas_zsyr2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, b, ldb, + beta, c, ldc) +} + +@[inline] +pub fn strmm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha f32, a []f32, lda int, mut b []f32, ldb int) { + C.cblas_strmm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, unsafe { &a[0] }, + lda, unsafe { &b[0] }, ldb) +} + +@[inline] +pub fn dtrmm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha f64, a []f64, lda int, mut b []f64, ldb int) { + C.cblas_dtrmm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, unsafe { &a[0] }, + lda, unsafe { &b[0] }, ldb) +} + +@[inline] +pub fn ctrmm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) { + C.cblas_ctrmm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, a, + lda, b, ldb) +} + +@[inline] +pub fn ztrmm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) { + C.cblas_ztrmm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, a, + lda, b, ldb) +} + +@[inline] +pub fn strsm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha f32, a []f32, lda int, mut b []f32, ldb int) { + C.cblas_strsm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, unsafe { &a[0] }, + lda, unsafe { &b[0] }, ldb) +} + +@[inline] +pub fn dtrsm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha f64, a []f64, lda int, mut b []f64, ldb int) { + C.cblas_dtrsm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, unsafe { &a[0] }, + lda, unsafe { &b[0] }, ldb) +} + +@[inline] +pub fn ctrsm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) { + C.cblas_ctrsm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, a, + lda, b, ldb) +} + +@[inline] +pub fn ztrsm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) { + C.cblas_ztrsm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, a, + lda, b, ldb) +} + +@[inline] +pub fn chemm(side Side, uplo bool, m int, n int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) { + C.cblas_chemm(.row_major, side, c_uplo(uplo), m, n, alpha, a, lda, b, ldb, beta, c, + ldc) +} + +@[inline] +pub fn zhemm(side Side, uplo bool, m int, n int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) { + C.cblas_zhemm(.row_major, side, c_uplo(uplo), m, n, alpha, a, lda, b, ldb, beta, c, + ldc) +} + +@[inline] +pub fn cherk(uplo bool, trans bool, n int, k int, alpha f32, a voidptr, lda int, beta f32, mut c voidptr, ldc int) { + C.cblas_cherk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, beta, + c, ldc) +} + +@[inline] +pub fn zherk(uplo bool, trans bool, n int, k int, alpha f64, a voidptr, lda int, beta f64, mut c voidptr, ldc int) { + C.cblas_zherk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, beta, + c, ldc) +} + +@[inline] +pub fn cher2k(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta f32, mut c voidptr, ldc int) { + C.cblas_cher2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, b, ldb, + beta, c, ldc) +} + +@[inline] +pub fn zher2k(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta f64, mut c voidptr, ldc int) { + C.cblas_zher2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, b, ldb, + beta, c, ldc) +} + +@[inline] +pub fn saxpby(n int, alpha f32, x []f32, incx int, beta f32, mut y []f32, incy int) { + C.cblas_saxpby(n, alpha, unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn daxpby(n int, alpha f64, x []f64, incx int, beta f64, mut y []f64, incy int) { + C.cblas_daxpby(n, alpha, unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy) +} + +@[inline] +pub fn caxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_caxpby(n, alpha, x, incx, beta, y, incy) +} + +@[inline] +pub fn zaxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_zaxpby(n, alpha, x, incx, beta, y, incy) +} + +@[inline] +pub fn somatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha f32, a []f32, lda int, mut b []f32, ldb int) { + C.cblas_somatcopy(order, c_trans(trans), rows, cols, alpha, unsafe { &a[0] }, lda, + unsafe { &b[0] }, ldb) +} + +@[inline] +pub fn domatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha f64, a []f64, lda int, mut b []f64, ldb int) { + C.cblas_domatcopy(order, c_trans(trans), rows, cols, alpha, unsafe { &a[0] }, lda, + unsafe { &b[0] }, ldb) +} + +@[inline] +pub fn comatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha &f32, a &f32, lda int, mut b &f32, ldb int) { + C.cblas_comatcopy(order, c_trans(trans), rows, cols, alpha, a, lda, b, ldb) +} + +@[inline] +pub fn zomatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha &f64, a &f64, lda int, mut b &f64, ldb int) { + C.cblas_zomatcopy(order, c_trans(trans), rows, cols, alpha, a, lda, b, ldb) +} + +@[inline] +pub fn simatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha f32, mut a []f32, lda int, ldb int) { + C.cblas_simatcopy(order, c_trans(trans), rows, cols, alpha, unsafe { &a[0] }, lda, + ldb) +} + +@[inline] +pub fn dimatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha f64, mut a []f64, lda int, ldb int) { + C.cblas_dimatcopy(order, c_trans(trans), rows, cols, alpha, unsafe { &a[0] }, lda, + ldb) +} + +@[inline] +pub fn cimatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha &f32, mut a &f32, lda int, ldb int) { + C.cblas_cimatcopy(order, c_trans(trans), rows, cols, alpha, a, lda, ldb) +} + +@[inline] +pub fn zimatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha &f64, mut a &f64, lda int, ldb int) { + C.cblas_zimatcopy(order, c_trans(trans), rows, cols, alpha, a, lda, ldb) +} + +@[inline] +pub fn sgeadd(order MemoryLayout, rows int, cols int, alpha f32, a []f32, lda int, beta f32, mut c []f32, ldc int) { + C.cblas_sgeadd(order, rows, cols, alpha, unsafe { &a[0] }, lda, beta, unsafe { &c[0] }, + ldc) +} + +@[inline] +pub fn dgeadd(order MemoryLayout, rows int, cols int, alpha f64, a []f64, lda int, beta f64, mut c []f64, ldc int) { + C.cblas_dgeadd(order, rows, cols, alpha, unsafe { &a[0] }, lda, beta, unsafe { &c[0] }, + ldc) +} + +@[inline] +pub fn cgeadd(order MemoryLayout, rows int, cols int, alpha &f32, a &f32, lda int, beta &f32, mut c &f32, ldc int) { + C.cblas_cgeadd(order, rows, cols, alpha, a, lda, beta, c, ldc) +} + +@[inline] +pub fn zgeadd(order MemoryLayout, rows int, cols int, alpha &f64, a &f64, lda int, beta &f64, mut c &f64, ldc int) { + C.cblas_zgeadd(order, rows, cols, alpha, a, lda, beta, c, ldc) } @[inline] diff --git a/la/matrix_ops.v b/la/matrix_ops.v index 74dc6ea28..a792aeede 100644 --- a/la/matrix_ops.v +++ b/la/matrix_ops.v @@ -90,7 +90,7 @@ pub fn matrix_svd(mut s []f64, mut u Matrix[f64], mut vt Matrix[f64], mut a Matr if copy_a { acpy = a.clone() } - lapack.dgesvd(&char('A'.str), &char('A'.str), a.m, a.n, acpy.data, 1, s, u.data, a.m, + lapack.dgesvd(.svd_all, .svd_all, a.m, a.n, mut acpy.data, 1, s, mut u.data, a.m, mut vt.data, a.n, superb) } diff --git a/lapack/conversions.v b/lapack/conversions.v new file mode 100644 index 000000000..fcb1cb239 --- /dev/null +++ b/lapack/conversions.v @@ -0,0 +1,70 @@ +module lapack + +import vsl.lapack.lapack64 + +// Direct specifies the direction of the multiplication for the Householder matrix. +pub type Direct = lapack64.Direct + +// Sort is the sorting order. +pub type Sort = lapack64.Sort + +// StoreV indicates the storage direction of elementary reflectors. +pub type StoreV = lapack64.StoreV + +// MatrixNorm represents the kind of matrix norm to compute. +pub type MatrixNorm = lapack64.MatrixNorm + +// MatrixType represents the kind of matrix represented in the data. +pub type MatrixType = lapack64.MatrixType + +// Pivot specifies the pivot type for plane rotations. +pub type Pivot = lapack64.Pivot + +// ApplyOrtho specifies which orthogonal matrix is applied in Dormbr. +pub type ApplyOrtho = lapack64.ApplyOrtho + +// GenOrtho specifies which orthogonal matrix is generated in Dorgbr. +pub type GenOrtho = lapack64.GenOrtho + +// SVDJob specifies the singular vector computation type for SVD. +pub type SVDJob = lapack64.SVDJob + +// GSVDJob specifies the singular vector computation type for Generalized SVD. +pub type GSVDJob = lapack64.GSVDJob + +// EVComp specifies how eigenvectors are computed in Dsteqr. +pub type EVComp = lapack64.EVComp + +// EVJob specifies whether eigenvectors are computed in Dsyev. +pub type EVJob = lapack64.EVJob + +// LeftEVJob specifies whether left eigenvectors are computed in Dgeev. +pub type LeftEVJob = lapack64.LeftEVJob + +// RightEVJob specifies whether right eigenvectors are computed in Dgeev. +pub type RightEVJob = lapack64.RightEVJob + +// BalanceJob specifies matrix balancing operation. +pub type BalanceJob = lapack64.BalanceJob + +// SchurJob specifies whether the Schur form is computed in Dhseqr. +pub type SchurJob = lapack64.SchurJob + +// SchurComp specifies whether and how the Schur vectors are computed in Dhseqr. +pub type SchurComp = lapack64.SchurComp + +// UpdateSchurComp specifies whether the matrix of Schur vectors is updated in Dtrexc. +pub type UpdateSchurComp = lapack64.UpdateSchurComp + +// EVSide specifies what eigenvectors are computed in Dtrevc3. +pub type EVSide = lapack64.EVSide + +// EVHowMany specifies which eigenvectors are computed in Dtrevc3 and how. +pub type EVHowMany = lapack64.EVHowMany + +// MaximizeNormXJob specifies the heuristic method for computing a contribution to +// the reciprocal Dif-estimate in Dlatdf. +pub type MaximizeNormXJob = lapack64.MaximizeNormXJob + +// OrthoComp specifies whether and how the orthogonal matrix is computed in Dgghrd. +pub type OrthoComp = lapack64.OrthoComp diff --git a/lapack/lapack64/conversions.v b/lapack/lapack64/conversions.v new file mode 100644 index 000000000..5052633f9 --- /dev/null +++ b/lapack/lapack64/conversions.v @@ -0,0 +1,199 @@ +module lapack64 + +// Direct specifies the direction of the multiplication for the Householder matrix. +pub enum Direct as u8 { + // Reflectors are right-multiplied, H_0 * H_1 * ... * H_{k-1}. + forward = u8(`F`) + // Reflectors are left-multiplied, H_{k-1} * ... * H_1 * H_0. + backward = u8(`B`) +} + +// Sort is the sorting order. +pub enum Sort as u8 { + sort_increasing = u8(`I`) + sort_decreasing = u8(`D`) +} + +// StoreV indicates the storage direction of elementary reflectors. +pub enum StoreV as u8 { + // Reflector stored in a column of the matrix. + column_wise = u8(`C`) + // Reflector stored in a row of the matrix. + row_wise = u8(`R`) +} + +// MatrixNorm represents the kind of matrix norm to compute. +pub enum MatrixNorm as u8 { + // max(abs(A(i,j))) + max_abs = u8(`M`) + // Maximum absolute column sum (one norm) + max_column_sum = u8(`O`) + // Maximum absolute row sum (infinity norm) + max_row_sum = u8(`I`) + // Frobenius norm (sqrt of sum of squares) + frobenius = u8(`F`) +} + +// MatrixType represents the kind of matrix represented in the data. +pub enum MatrixType as u8 { + // A general dense matrix. + general = u8(`G`) + // An upper triangular matrix. + upper_tri = u8(`U`) + // A lower triangular matrix. + lower_tri = u8(`L`) +} + +// Pivot specifies the pivot type for plane rotations. +pub enum Pivot as u8 { + variable = u8(`V`) + top = u8(`T`) + bottom = u8(`B`) +} + +// ApplyOrtho specifies which orthogonal matrix is applied in Dormbr. +pub enum ApplyOrtho as u8 { + // Apply P or Pᵀ. + apply_p = u8(`P`) + // Apply Q or Qᵀ. + apply_q = u8(`Q`) +} + +// GenOrtho specifies which orthogonal matrix is generated in Dorgbr. +pub enum GenOrtho as u8 { + // Generate Pᵀ. + generate_pt = u8(`P`) + // Generate Q. + generate_q = u8(`Q`) +} + +// SVDJob specifies the singular vector computation type for SVD. +pub enum SVDJob as u8 { + // Compute all columns of the orthogonal matrix U or V. + svd_all = u8(`A`) + // Compute the singular vectors and store them in the orthogonal matrix U or V. + svd_store = u8(`S`) + // Compute the singular vectors and overwrite them on the input matrix A. + svd_overwrite = u8(`O`) + // Do not compute singular vectors. + svd_none = u8(`N`) +} + +// GSVDJob specifies the singular vector computation type for Generalized SVD. +pub enum GSVDJob as u8 { + // Compute orthogonal matrix U. + gsvd_u = u8(`U`) + // Compute orthogonal matrix V. + gsvd_v = u8(`V`) + // Compute orthogonal matrix Q. + gsvd_q = u8(`Q`) + // Use unit-initialized matrix. + gsvd_unit = u8(`I`) + // Do not compute orthogonal matrix. + gsvd_none = u8(`N`) +} + +// EVComp specifies how eigenvectors are computed in Dsteqr. +pub enum EVComp as u8 { + // Compute eigenvectors of the original symmetric matrix. + ev_orig = u8(`V`) + // Compute eigenvectors of the tridiagonal matrix. + ev_tridiag = u8(`I`) + // Do not compute eigenvectors. + ev_comp_none = u8(`N`) +} + +// EVJob specifies whether eigenvectors are computed in Dsyev. +pub enum EVJob as u8 { + // Compute eigenvectors. + ev_compute = u8(`V`) + // Do not compute eigenvectors. + ev_none = u8(`N`) +} + +// LeftEVJob specifies whether left eigenvectors are computed in Dgeev. +pub enum LeftEVJob as u8 { + // Compute left eigenvectors. + left_ev_compute = u8(`V`) + // Do not compute left eigenvectors. + left_ev_none = u8(`N`) +} + +// RightEVJob specifies whether right eigenvectors are computed in Dgeev. +pub enum RightEVJob as u8 { + // Compute right eigenvectors. + right_ev_compute = u8(`V`) + // Do not compute right eigenvectors. + right_ev_none = u8(`N`) +} + +// BalanceJob specifies matrix balancing operation. +pub enum BalanceJob as u8 { + permute = u8(`P`) + scale = u8(`S`) + permute_scale = u8(`B`) + balance_none = u8(`N`) +} + +// SchurJob specifies whether the Schur form is computed in Dhseqr. +pub enum SchurJob as u8 { + eigenvalues_only = u8(`E`) + eigenvalues_and_schur = u8(`S`) +} + +// SchurComp specifies whether and how the Schur vectors are computed in Dhseqr. +pub enum SchurComp as u8 { + // Compute Schur vectors of the original matrix. + schur_orig = u8(`V`) + // Compute Schur vectors of the upper Hessenberg matrix. + schur_hess = u8(`I`) + // Do not compute Schur vectors. + schur_none = u8(`N`) +} + +// UpdateSchurComp specifies whether the matrix of Schur vectors is updated in Dtrexc. +pub enum UpdateSchurComp as u8 { + // Update the matrix of Schur vectors. + update_schur = u8(`V`) + // Do not update the matrix of Schur vectors. + update_schur_none = u8(`N`) +} + +// EVSide specifies what eigenvectors are computed in Dtrevc3. +pub enum EVSide as u8 { + // Compute only right eigenvectors. + ev_right = u8(`R`) + // Compute only left eigenvectors. + ev_left = u8(`L`) + // Compute both right and left eigenvectors. + ev_both = u8(`B`) +} + +// EVHowMany specifies which eigenvectors are computed in Dtrevc3 and how. +pub enum EVHowMany as u8 { + // Compute all right and/or left eigenvectors. + ev_all = u8(`A`) + // Compute all right and/or left eigenvectors multiplied by an input matrix. + ev_all_mul_q = u8(`B`) + // Compute selected right and/or left eigenvectors. + ev_selected = u8(`S`) +} + +// MaximizeNormXJob specifies the heuristic method for computing a contribution to +// the reciprocal Dif-estimate in Dlatdf. +pub enum MaximizeNormXJob as u8 { + // Solve Z*x=h-f where h is a vector of ±1. + local_look_ahead = 0 + // Compute an approximate null-vector e of Z, normalize e and solve Z*x=±e-f. + normalized_null_vector = 2 +} + +// OrthoComp specifies whether and how the orthogonal matrix is computed in Dgghrd. +pub enum OrthoComp as u8 { + // Do not compute the orthogonal matrix. + ortho_none = u8(`N`) + // The orthogonal matrix is formed explicitly and returned in the argument. + ortho_explicit = u8(`I`) + // The orthogonal matrix is post-multiplied into the matrix stored in the argument on entry. + ortho_postmul = u8(`V`) +} diff --git a/lapack/lapack64/dgebal.v b/lapack/lapack64/dgebal.v new file mode 100644 index 000000000..ee7746a38 --- /dev/null +++ b/lapack/lapack64/dgebal.v @@ -0,0 +1,33 @@ +module lapack64 + +import math +import vsl.blas + +// dgebal balances a general real matrix A. +pub fn dgebal(job BalanceJob, n int, mut a []f64, lda int, scale []f64) int { + if n == 0 { + return 0 + } + + mut info := 0 + if job != .balance_none && job != .permute && job != .scale && job != .permute_scale { + info = -1 + } else if n < 0 { + info = -2 + } else if lda < math.max(1, n) { + info = -4 + } + + if info != 0 { + return info + } + + // Quick return if possible + if n == 0 { + return 0 + } + + // Placeholder for the actual LAPACK function calls + // Example: info = dgebal(job, n, a, lda, scale) + return info +} diff --git a/lapack/lapack64/dgeev.v b/lapack/lapack64/dgeev.v new file mode 100644 index 000000000..38799070c --- /dev/null +++ b/lapack/lapack64/dgeev.v @@ -0,0 +1,39 @@ +module lapack64 + +import math +import vsl.blas + +// dgeev computes the eigenvalues and, optionally, the left and/or right eigenvectors for a real nonsymmetric matrix A. +pub fn dgeev(jobvl LeftEVJob, jobvr LeftEVJob, n int, mut a []f64, lda int, wr []f64, wi []f64, mut vl []f64, ldvl int, mut vr []f64, ldvr int) int { + if n == 0 { + return 0 + } + + mut info := 0 + if jobvl != .left_ev_none && jobvl != .left_ev_compute { + info = -1 + } else if jobvr != .left_ev_none && jobvr != .left_ev_compute { + info = -2 + } else if n < 0 { + info = -3 + } else if lda < math.max(1, n) { + info = -5 + } else if ldvl < 1 || (jobvl == .left_ev_compute && ldvl < n) { + info = -8 + } else if ldvr < 1 || (jobvr == .left_ev_compute && ldvr < n) { + info = -10 + } + + if info != 0 { + return info + } + + // Quick return if possible + if n == 0 { + return 0 + } + + // Placeholder for the actual LAPACK function calls + // Example: info = dgehrd(n, ilo, ihi, a, lda, tau, work, lwork) + return info +} diff --git a/lapack/lapack64/dgehrd.v b/lapack/lapack64/dgehrd.v new file mode 100644 index 000000000..2823c0c4d --- /dev/null +++ b/lapack/lapack64/dgehrd.v @@ -0,0 +1,35 @@ +module lapack64 + +import math +import vsl.blas + +// dgehrd reduces a general real matrix A to upper Hessenberg form H by an orthogonal similarity transformation. +pub fn dgehrd(n int, ilo int, ihi int, mut a []f64, lda int, tau []f64) int { + if n == 0 { + return 0 + } + + mut info := 0 + if n < 0 { + info = -1 + } else if ilo < 1 || ilo > math.max(1, n) { + info = -2 + } else if ihi < math.min(ilo, n) || ihi > n { + info = -3 + } else if lda < math.max(1, n) { + info = -5 + } + + if info != 0 { + return info + } + + // Quick return if possible + if n == 0 { + return 0 + } + + // Placeholder for the actual LAPACK function calls + // Example: info = dgehrd(n, ilo, ihi, a, lda, tau, work, lwork) + return info +} diff --git a/lapack/lapack64/dgesv.v b/lapack/lapack64/dgesv.v index d0d7f90a5..94f948503 100644 --- a/lapack/lapack64/dgesv.v +++ b/lapack/lapack64/dgesv.v @@ -22,7 +22,7 @@ import vsl.blas // The factored form of A is then used to solve the system of equations A * X = // B. On entry, b contains the right hand side matrix B. On return, if ok is // true, b contains the solution matrix X. -pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb int) { +pub fn dgesv(n int, nrhs int, mut a []f64, lda int, mut ipiv []int, mut b []f64, ldb int) { if n < 0 { panic(n_lt0) } @@ -51,6 +51,6 @@ pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb panic(short_b) } - dgetrf(n, n, mut a, lda, ipiv) + dgetrf(n, n, mut a, lda, mut ipiv) dgetrs(.no_trans, n, nrhs, mut a, lda, ipiv, mut b, ldb) } diff --git a/lapack/lapack64/dgesvd.v b/lapack/lapack64/dgesvd.v new file mode 100644 index 000000000..a8f035e7d --- /dev/null +++ b/lapack/lapack64/dgesvd.v @@ -0,0 +1,42 @@ +module lapack64 + +import math +import vsl.blas + +// dgesvd computes the singular value decomposition (SVD) of a real matrix A. +pub fn dgesvd(jobu SVDJob, jobvt SVDJob, m int, n int, mut a []f64, lda int, s []f64, mut u []f64, ldu int, mut vt []f64, ldvt int, superb []f64) int { + if m == 0 || n == 0 { + return 0 + } + + mut info := 0 + if jobu != .svd_all && jobu != .svd_store && jobu != .svd_overwrite && jobu != .svd_none { + info = -1 + } else if jobvt != .svd_all && jobvt != .svd_store && jobvt != .svd_overwrite + && jobvt != .svd_none { + info = -2 + } else if m < 0 { + info = -3 + } else if n < 0 { + info = -4 + } else if lda < math.max(1, m) { + info = -6 + } else if ldu < 1 || (jobu == .svd_store && ldu < m) || (jobu == .svd_all && ldu < m) { + info = -9 + } else if ldvt < 1 || (jobvt == .svd_store && ldvt < n) || (jobvt == .svd_all && ldvt < n) { + info = -11 + } + + if info != 0 { + return info + } + + // Quick return if possible + if m == 0 || n == 0 { + return 0 + } + + // Placeholder for the actual LAPACK function calls + // Example: info = dgesvd(jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork) + return info +} diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v index e1b6553fc..878c7b5b1 100644 --- a/lapack/lapack64/dgetrf.v +++ b/lapack/lapack64/dgetrf.v @@ -24,7 +24,7 @@ import vsl.blas // Dgetrf returns whether the matrix A is nonsingular. The LU decomposition will // be computed regardless of the singularity of A, but the result should not be // used to solve a system of equation. -pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) { +pub fn dgetrf(m int, n int, mut a []f64, lda int, mut ipiv []int) { mn := math.min(m, n) if m < 0 { @@ -34,7 +34,7 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) { panic(n_lt0) } if lda < math.max(1, n) { - panic(bad_lda) + panic(bad_ld_a) } // quick return if possible @@ -53,7 +53,8 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) { if nb <= 1 || nb >= mn { // use the unblocked algorithm. - return dgetf2(m, n, mut a, lda, ipiv) + dgetf2(m, n, mut a, lda, ipiv) + return } for j := 0; j < mn; j += nb { @@ -75,12 +76,12 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) { dlaswp(j, mut slice1, lda, j, j + jb, ipiv[..j + jb], 1) mut slice2 := unsafe { a[j * lda + j + jb..] } - blas.dtstrf(.left, .lower, .notrans, .unit, jb, n - j - jb, 1, a[j * lda + j..], + blas.dtstrf(.left, false, false, .unit, jb, n - j - jb, 1, a[j * lda + j..], lda, mut slice2, lda) if j + jb < m { mut slice3 := unsafe { a[(j + jb) * lda + j + jb..] } - blas.dgemm(.notrans, .notrans, m - j - jb, n - j - jb, jb, -1, a[(j + jb) * lda + j..], + blas.dgemm(false, false, m - j - jb, n - j - jb, jb, -1, a[(j + jb) * lda + j..], lda, a[j * lda + j + jb..], lda, 1, mut slice3, lda) } } diff --git a/lapack/lapack64/dgetri.v b/lapack/lapack64/dgetri.v new file mode 100644 index 000000000..8cd14300e --- /dev/null +++ b/lapack/lapack64/dgetri.v @@ -0,0 +1,31 @@ +module lapack64 + +import math +import vsl.blas + +// dgetri computes the inverse of a matrix using the LU factorization computed by dgetrf. +pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) int { + if n == 0 { + return 0 + } + + mut info := 0 + if n < 0 { + info = -1 + } else if lda < math.max(1, n) { + info = -3 + } + + if info != 0 { + return info + } + + // Quick return if possible + if n == 0 { + return 0 + } + + // Placeholder for the actual LAPACK function calls + // Example: info = dgetri(n, a, lda, ipiv, work, lwork) + return info +} diff --git a/lapack/lapack64/dgetrs.v b/lapack/lapack64/dgetrs.v index 0458cc1e2..bbbc586b0 100644 --- a/lapack/lapack64/dgetrs.v +++ b/lapack/lapack64/dgetrs.v @@ -52,16 +52,15 @@ pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []f64, lda int, ipiv // Solve A * X = B. dlaswp(nrhs, b, ldb, 0, n - 1, ipiv, 1) // Solve L * X = B, overwriting B with X. - blas.dtrsm(.left, .lower, .no_trans, .unit, n, nrhs, 1, mut a, lda, mut b, ldb) + blas.dtrsm(.left, false, false, .unit, n, nrhs, 1, a, lda, mut b, ldb) // Solve U * X = B, overwriting B with X. - blas.dtrsm(.left, .upper, .no_trans, .non_unit, n, nrhs, 1, mut a, lda, mut b, - ldb) + blas.dtrsm(.left, true, false, .non_unit, n, nrhs, 1, a, lda, mut b, ldb) } // Solve Aᵀ * X = B. // Solve Uᵀ * X = B, overwriting B with X. - blas.dtrsm(.left, .upper, .trans, .non_unit, n, nrhs, 1, mut a, lda, mut b, ldb) + blas.dtrsm(.left, true, true, .non_unit, n, nrhs, 1, a, lda, mut b, ldb) // Solve Lᵀ * X = B, overwriting B with X. - blas.dtrsm(.left, .lower, .trans, .unit, n, nrhs, 1, mut a, lda, mut b, ldb) + blas.dtrsm(.left, false, true, .unit, n, nrhs, 1, a, lda, mut b, ldb) dlaswp(nrhs, b, ldb, 0, n - 1, ipiv, -1) } diff --git a/lapack/lapack64/dpotrf.v b/lapack/lapack64/dpotrf.v new file mode 100644 index 000000000..b4d307809 --- /dev/null +++ b/lapack/lapack64/dpotrf.v @@ -0,0 +1,33 @@ +module lapack64 + +import math +import vsl.blas + +// dpotrf computes the Cholesky factorization of a real symmetric positive definite matrix A. +pub fn dpotrf(uplo blas.Uplo, n int, mut a []f64, lda int) int { + if n == 0 { + return 0 + } + + mut info := 0 + if uplo != .upper && uplo != .lower { + info = -1 + } else if n < 0 { + info = -2 + } else if lda < math.max(1, n) { + info = -4 + } + + if info != 0 { + return info + } + + // Quick return if possible + if n == 0 { + return 0 + } + + // Placeholder for the actual LAPACK function calls + // Example: info = dpotrf(uplo, n, a, lda, work, lwork) + return info +} diff --git a/lapack/lapack64/dsyev.v b/lapack/lapack64/dsyev.v new file mode 100644 index 000000000..a48998d2a --- /dev/null +++ b/lapack/lapack64/dsyev.v @@ -0,0 +1,38 @@ +module lapack64 + +import math +import vsl.blas + +// dsyev computes all eigenvalues and, optionally, eigenvectors of a real symmetric matrix A. +pub fn dsyev(jobz EVJob, uplo blas.Uplo, n int, mut a []f64, lda int, w []f64) int { + if n == 0 { + return 0 + } + + mut info := 0 + if jobz != .ev_none && jobz != .ev_compute { + info = -1 + } else if uplo != .upper && uplo != .lower { + info = -2 + } else if n < 0 { + info = -3 + } else if lda < math.max(1, n) { + info = -5 + } + + if info != 0 { + return info + } + + // Quick return if possible + if n == 0 { + return 0 + } + + // Call the relevant LAPACK functions + // (Here we would call the internal implementations like dsytrd, dorgtr, dormtr, etc.) + + // Placeholder for the actual LAPACK function calls + // Example: info = dsytrd(uplo, n, a, lda, w, work, lwork) + return info +} diff --git a/lapack/lapack64/ilaenv.v b/lapack/lapack64/ilaenv.v index 52d2e4e7d..08151271b 100644 --- a/lapack/lapack64/ilaenv.v +++ b/lapack/lapack64/ilaenv.v @@ -1,5 +1,7 @@ module lapack64 +import math + // ilaenv returns algorithm tuning parameters for the algorithm given by the // input string. ispec specifies the parameter to return: // @@ -242,7 +244,7 @@ fn ilaenv(ispec int, name string, opts string, n1 int, n2 int, n3 int, n4 int) i // Used by xGELSS and xGESVD // Assuming n1 and n2 are defined elsewhere in your code // Replace `min(n1, n2)` with actual min calculation or function - return int(f64(min(n1, n2)) * 1.6) + return int(f64(math.min(n1, n2)) * 1.6) } 7 { // Not used diff --git a/lapack/lapack_d_vsl_lapack_common.v b/lapack/lapack_d_vsl_lapack_lapacke.v similarity index 78% rename from lapack/lapack_d_vsl_lapack_common.v rename to lapack/lapack_d_vsl_lapack_lapacke.v index 0c4064c2b..cbc7d9839 100644 --- a/lapack/lapack_d_vsl_lapack_common.v +++ b/lapack/lapack_d_vsl_lapack_lapacke.v @@ -5,19 +5,19 @@ import vsl.blas fn C.LAPACKE_dgesv(matrix_layout blas.MemoryLayout, n int, nrhs int, a &f64, lda int, ipiv &int, b &f64, ldb int) int -fn C.LAPACKE_dgesvd(matrix_layout blas.MemoryLayout, jobu &char, jobvt &char, m int, n int, a &f64, lda int, s &f64, u &f64, ldu int, vt &f64, ldvt int, superb &f64) int +fn C.LAPACKE_dgesvd(matrix_layout blas.MemoryLayout, jobu SVDJob, jobvt SVDJob, m int, n int, a &f64, lda int, s &f64, u &f64, ldu int, vt &f64, ldvt int, superb &f64) int fn C.LAPACKE_dgetrf(matrix_layout blas.MemoryLayout, m int, n int, a &f64, lda int, ipiv &int) int fn C.LAPACKE_dgetri(matrix_layout blas.MemoryLayout, n int, a &f64, lda int, ipiv &int) int -fn C.LAPACKE_dpotrf(matrix_layout blas.MemoryLayout, up u32, n int, a &f64, lda int) int +fn C.LAPACKE_dpotrf(matrix_layout blas.MemoryLayout, uplo blas.Uplo, n int, a &f64, lda int) int -fn C.LAPACKE_dgeev(matrix_layout blas.MemoryLayout, calc_vl &char, calc_vr &char, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int +fn C.LAPACKE_dgeev(matrix_layout blas.MemoryLayout, calc_vl LeftEVJob, calc_vr LeftEVJob, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int -fn C.LAPACKE_dsyev(matrix_layout blas.MemoryLayout, jobz byte, uplo byte, n int, a &f64, lda int, w &f64, work &f64, lwork int) int +fn C.LAPACKE_dsyev(matrix_layout blas.MemoryLayout, jobz EVJob, uplo blas.Uplo, n int, a &f64, lda int, w &f64, work &f64, lwork int) int -fn C.LAPACKE_dgebal(matrix_layout blas.MemoryLayout, job &char, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int +fn C.LAPACKE_dgebal(matrix_layout blas.MemoryLayout, job BalanceJob, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int fn C.LAPACKE_dgehrd(matrix_layout blas.MemoryLayout, n int, ilo int, ihi int, a &f64, lda int, tau &f64, work &f64, lwork int) int @@ -74,9 +74,9 @@ pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb // Note that the routine returns V**T, not V. // // NOTE: matrix 'a' will be modified -pub fn dgesvd(jobu &char, jobvt &char, m int, n int, a []f64, lda int, s []f64, u []f64, ldu int, vt []f64, ldvt int, superb []f64) { - info := C.LAPACKE_dgesvd(.row_major, jobu, jobvt, m, n, &a[0], lda, &s[0], &u[0], - ldu, &vt[0], ldvt, &superb[0]) +pub fn dgesvd(jobu SVDJob, jobvt SVDJob, m int, n int, mut a []f64, lda int, s []f64, mut u []f64, ldu int, mut vt []f64, ldvt int, superb []f64) { + info := C.LAPACKE_dgesvd(.row_major, jobu, jobvt, m, n, unsafe { &a[0] }, lda, &s[0], + unsafe { &u[0] }, ldu, unsafe { &vt[0] }, ldvt, &superb[0]) if info != 0 { errors.vsl_panic('lapack failed', .efailed) } @@ -141,9 +141,9 @@ pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) { // where U is an upper triangular matrix and L is lower triangular. // // This is the block version of the algorithm, calling Level 3 BLAS. -pub fn dpotrf(up bool, n int, mut a []f64, lda int) { +pub fn dpotrf(uplo bool, n int, mut a []f64, lda int) { unsafe { - info := C.LAPACKE_dpotrf(.row_major, blas.l_uplo(up), n, &a[0], lda) + info := C.LAPACKE_dpotrf(.row_major, blas.c_uplo(uplo), n, &a[0], lda) if info != 0 { errors.vsl_panic('lapack failed', .efailed) } @@ -173,24 +173,24 @@ pub fn dpotrf(up bool, n int, mut a []f64, lda int) { // // The computed eigenvectors are normalized to have Euclidean norm // equal to 1 and largest component real. -pub fn dgeev(calc_vl bool, calc_vr bool, n int, mut a []f64, lda int, wr []f64, wi []f64, vl []f64, ldvl_ int, vr []f64, ldvr_ int) { +pub fn dgeev(calc_vl LeftEVJob, calc_vr LeftEVJob, n int, mut a []f64, lda int, wr []f64, wi []f64, vl []f64, ldvl_ int, vr []f64, ldvr_ int) { mut vvl := 0.0 mut vvr := 0.0 mut ldvl := ldvl_ mut ldvr := ldvr_ - if calc_vl { + if calc_vl == .left_ev_compute { vvl = vl[0] } else { ldvl = 1 } - if calc_vr { + if calc_vr == .left_ev_compute { vvr = vr[0] } else { ldvr = 1 } unsafe { - info := C.LAPACKE_dgeev(.row_major, &char(blas.job_vlr(calc_vl).str().str), &char(blas.job_vlr(calc_vr).str().str), - n, &a[0], lda, &wr[0], &wi[0], &vvl, ldvl, &vvr, ldvr) + info := C.LAPACKE_dgeev(.row_major, calc_vl, calc_vr, n, &a[0], lda, &wr[0], &wi[0], + &vvl, ldvl, &vvr, ldvr) if info != 0 { errors.vsl_panic('lapack failed', .efailed) } diff --git a/lapack/lapack_notd_vsl_lapack_common.v b/lapack/lapack_notd_vsl_lapack_lapacke.v similarity index 70% rename from lapack/lapack_notd_vsl_lapack_common.v rename to lapack/lapack_notd_vsl_lapack_lapacke.v index 3635ed6cd..39d2cbdd6 100644 --- a/lapack/lapack_notd_vsl_lapack_common.v +++ b/lapack/lapack_notd_vsl_lapack_lapacke.v @@ -4,20 +4,6 @@ import vsl.errors import vsl.blas import vsl.lapack.lapack64 -fn C.LAPACKE_dgesvd(matrix_layout blas.MemoryLayout, jobu &char, jobvt &char, m int, n int, a &f64, lda int, s &f64, u &f64, ldu int, vt &f64, ldvt int, superb &f64) int - -fn C.LAPACKE_dgetri(matrix_layout blas.MemoryLayout, n int, a &f64, lda int, ipiv &int) int - -fn C.LAPACKE_dpotrf(matrix_layout blas.MemoryLayout, up u32, n int, a &f64, lda int) int - -fn C.LAPACKE_dgeev(matrix_layout blas.MemoryLayout, calc_vl &char, calc_vr &char, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int - -fn C.LAPACKE_dsyev(matrix_layout blas.MemoryLayout, jobz byte, uplo byte, n int, a &f64, lda int, w &f64, work &f64, lwork int) int - -fn C.LAPACKE_dgebal(matrix_layout blas.MemoryLayout, job &char, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int - -fn C.LAPACKE_dgehrd(matrix_layout blas.MemoryLayout, n int, ilo int, ihi int, a &f64, lda int, tau &f64, work &f64, lwork int) int - // dgesv computes the solution to a real system of linear equations. // // See: http://www.netlib.org/lapack/explore-html/d8/d72/dgesv_8f.html @@ -65,9 +51,9 @@ pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb // Note that the routine returns V**T, not V. // // NOTE: matrix 'a' will be modified -pub fn dgesvd(jobu &char, jobvt &char, m int, n int, a []f64, lda int, s []f64, u []f64, ldu int, vt []f64, ldvt int, superb []f64) { - info := C.LAPACKE_dgesvd(.row_major, jobu, jobvt, m, n, &a[0], lda, &s[0], &u[0], - ldu, &vt[0], ldvt, &superb[0]) +pub fn dgesvd(jobu SVDJob, jobvt SVDJob, m int, n int, mut a []f64, lda int, s []f64, mut u []f64, ldu int, mut vt []f64, ldvt int, superb []f64) { + info := lapack64.dgesvd(jobu, jobvt, m, n, mut a, lda, s, mut u, ldu, mut vt, ldvt, + superb) if info != 0 { errors.vsl_panic('lapack failed', .efailed) } @@ -102,11 +88,9 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) { // This method inverts U and then computes inv(A) by solving the system // inv(A)*L = inv(U) for inv(A). pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) { - unsafe { - info := C.LAPACKE_dgetri(.row_major, n, &a[0], lda, &ipiv[0]) - if info != 0 { - errors.vsl_panic('lapack failed', .efailed) - } + info := lapack64.dgetri(n, mut a, lda, ipiv) + if info != 0 { + errors.vsl_panic('lapack failed', .efailed) } } @@ -128,11 +112,9 @@ pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) { // // This is the block version of the algorithm, calling Level 3 BLAS. pub fn dpotrf(up bool, n int, mut a []f64, lda int) { - unsafe { - info := C.LAPACKE_dpotrf(.row_major, blas.l_uplo(up), n, &a[0], lda) - if info != 0 { - errors.vsl_panic('lapack failed', .efailed) - } + info := lapack64.dpotrf(blas.c_uplo(up), n, mut a, lda) + if info != 0 { + errors.vsl_panic('lapack failed', .efailed) } } @@ -159,26 +141,28 @@ pub fn dpotrf(up bool, n int, mut a []f64, lda int) { // // The computed eigenvectors are normalized to have Euclidean norm // equal to 1 and largest component real. -pub fn dgeev(calc_vl bool, calc_vr bool, n int, mut a []f64, lda int, wr []f64, wi []f64, vl []f64, ldvl_ int, vr []f64, ldvr_ int) { +pub fn dgeev(calc_vl LeftEVJob, calc_vr LeftEVJob, n int, mut a []f64, lda int, wr []f64, wi []f64, mut vl []f64, ldvl_ int, mut vr []f64, ldvr_ int) { mut vvl := 0.0 mut vvr := 0.0 mut ldvl := ldvl_ mut ldvr := ldvr_ - if calc_vl { + if calc_vl == .left_ev_compute { vvl = vl[0] } else { ldvl = 1 } - if calc_vr { + if calc_vr == .left_ev_compute { vvr = vr[0] } else { ldvr = 1 } - unsafe { - info := C.LAPACKE_dgeev(.row_major, &char(blas.job_vlr(calc_vl).str().str), &char(blas.job_vlr(calc_vr).str().str), - n, &a[0], lda, &wr[0], &wi[0], &vvl, ldvl, &vvr, ldvr) - if info != 0 { - errors.vsl_panic('lapack failed', .efailed) - } + + vl[0] = vvl + vr[0] = vvr + + info := lapack64.dgeev(calc_vl, calc_vr, n, mut a, lda, wr, wi, mut vl, ldvl, mut + vr, ldvr) + if info != 0 { + errors.vsl_panic('lapack failed', .efailed) } } From bfa69075e9101c1f9408261853728812697004e1 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Tue, 18 Jun 2024 01:44:33 -0300 Subject: [PATCH 16/33] refactor: Replace constant lookup with a list of constants in ilaenv.v --- lapack/lapack64/ilaenv.v | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lapack/lapack64/ilaenv.v b/lapack/lapack64/ilaenv.v index 08151271b..f9661e1c2 100644 --- a/lapack/lapack64/ilaenv.v +++ b/lapack/lapack64/ilaenv.v @@ -22,7 +22,7 @@ import math // // ilaenv is an internal routine. It is exported for testing purposes. fn ilaenv(ispec int, name string, opts string, n1 int, n2 int, n3 int, n4 int) int { - // TODO(btracey): Replace this with a constant lookup? A list of constants? + // TODO(ulises-jeremias): Replace this with a constant lookup? A list of constants? sname := name[0] == `S` || name[0] == `D` cname := name[0] == `C` || name[0] == `Z` if !sname && !cname { From 961475ec4d03f84667151e6a9b73c01400690501 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Tue, 18 Jun 2024 02:09:56 -0300 Subject: [PATCH 17/33] refactor: Update create_image_2d function to use local variable for format --- la/densesol.v | 4 +- la/matrix_ops.v | 10 +-- lapack/lapack64/dgesv.v | 2 +- lapack/lapack64/dgetf2.v | 66 ++++++++++++++++++++ lapack/lapack64/dgetrf.v | 23 ++++--- lapack/lapack64/dgetri.v | 4 +- lapack/lapack64/dgetrs.v | 6 +- lapack/lapack64/dlaswp.v | 44 +++++++++++++ lapack/lapack64/iparmq.v | 83 +++++++++++++++++++++++++ lapack/lapack_d_vsl_lapack_lapacke.v | 8 +-- lapack/lapack_notd_vsl_lapack_lapacke.v | 12 ++-- 11 files changed, 229 insertions(+), 33 deletions(-) create mode 100644 lapack/lapack64/dgetf2.v create mode 100644 lapack/lapack64/dlaswp.v create mode 100644 lapack/lapack64/iparmq.v diff --git a/la/densesol.v b/la/densesol.v index 10395a902..3deed74da 100644 --- a/la/densesol.v +++ b/la/densesol.v @@ -15,6 +15,6 @@ pub fn den_solve(mut x []f64, a &Matrix[f64], b []f64, preserve_a bool) { for i in 0 .. x.len { x[i] = b[i] } - ipiv := []int{len: a_.m} - lapack.dgesv(a_.m, 1, mut a_.data, a_.m, ipiv, mut x, 1) + mut ipiv := []int{len: a_.m} + lapack.dgesv(a_.m, 1, mut a_.data, a_.m, mut ipiv, mut x, 1) } diff --git a/la/matrix_ops.v b/la/matrix_ops.v index a792aeede..cbd6cb515 100644 --- a/la/matrix_ops.v +++ b/la/matrix_ops.v @@ -12,8 +12,8 @@ pub fn matrix_det(o &Matrix[f64]) f64 { .efailed) } mut ai := o.data.clone() - ipiv := []int{len: int(math.min(o.m, o.n))} - lapack.dgetrf(o.m, o.n, mut ai, o.m, ipiv) // NOTE: ipiv are 1-based indices + mut ipiv := []int{len: int(math.min(o.m, o.n))} + lapack.dgetrf(o.m, o.n, mut ai, o.m, mut ipiv) // NOTE: ipiv are 1-based indices mut det := 1.0 for i in 0 .. o.m { if ipiv[i] - 1 == i { // NOTE: ipiv are 1-based indices @@ -107,8 +107,8 @@ pub fn matrix_inv(mut ai Matrix[f64], mut a Matrix[f64], calc_det bool) f64 { // square inverse if a.m == a.n { ai.data = a.data.clone() - ipiv := []int{len: int(math.min(a.m, a.n))} - lapack.dgetrf(a.m, a.n, mut ai.data, a.m, ipiv) // NOTE: ipiv are 1-based indices + mut ipiv := []int{len: int(math.min(a.m, a.n))} + lapack.dgetrf(a.m, a.n, mut ai.data, a.m, mut ipiv) // NOTE: ipiv are 1-based indices if calc_det { det = 1.0 for i := 0; i < a.m; i++ { @@ -119,7 +119,7 @@ pub fn matrix_inv(mut ai Matrix[f64], mut a Matrix[f64], calc_det bool) f64 { } } } - lapack.dgetri(a.n, mut ai.data, a.m, ipiv) + lapack.dgetri(a.n, mut ai.data, a.m, mut ipiv) return det } // singular value decomposition diff --git a/lapack/lapack64/dgesv.v b/lapack/lapack64/dgesv.v index 94f948503..3c6834d5e 100644 --- a/lapack/lapack64/dgesv.v +++ b/lapack/lapack64/dgesv.v @@ -52,5 +52,5 @@ pub fn dgesv(n int, nrhs int, mut a []f64, lda int, mut ipiv []int, mut b []f64, } dgetrf(n, n, mut a, lda, mut ipiv) - dgetrs(.no_trans, n, nrhs, mut a, lda, ipiv, mut b, ldb) + dgetrs(.no_trans, n, nrhs, mut a, lda, mut ipiv, mut b, ldb) } diff --git a/lapack/lapack64/dgetf2.v b/lapack/lapack64/dgetf2.v new file mode 100644 index 000000000..2748c78d7 --- /dev/null +++ b/lapack/lapack64/dgetf2.v @@ -0,0 +1,66 @@ +module lapack64 + +import math +import vsl.blas + +pub fn dgetf2(m int, n int, mut a []f64, lda int, mut ipiv []int) { + mn := math.min(m, n) + if m < 0 { + panic(m_lt0) + } else if n < 0 { + panic(n_lt0) + } else if lda < math.max(1, n) { + panic(bad_ld_a) + } + + // Quick return if possible. + if mn == 0 { + return + } + + if a.len < (m - 1) * lda + n { + panic(short_a) + } else if ipiv.len != mn { + panic(bad_len_ipiv) + } + + sfmin := dlamch_s() + + for j := 0; j < mn; j++ { + // Find a pivot and test for singularity. + jp := j + blas.idamax(m - j, a[j * lda + j..], lda) + ipiv[j] = jp + if a[jp * lda + j] == 0.0 { + panic('lapack: matrix is singular') + } else { + // Swap the rows if necessary. + if jp != j { + mut slice1 := unsafe { a[j * lda..] } + mut slice2 := unsafe { a[jp * lda..] } + blas.dswap(n, mut slice1, 1, mut slice2, 1) + } + if j < m - 1 { + aj := a[j * lda + j] + if math.abs(aj) >= sfmin { + mut slice3 := unsafe { a[(j + 1) * lda + j..] } + blas.dscal(m - j - 1, 1.0 / aj, mut slice3, lda) + } else { + for i := 0; i < m - j - 1; i++ { + a[(j + 1) * lda + j] /= aj + } + } + } + } + if j < mn - 1 { + mut slice4 := unsafe { a[(j + 1) * lda + j + 1..] } + blas.dger(m - j - 1, n - j - 1, -1.0, a[(j + 1) * lda + j..], lda, a[j * lda + j + 1..], + 1, mut slice4, lda) + } + } +} + +fn dlamch_s() f64 { + // Returns the safe minimum value (sfmin). + // This value is used as a threshold for detecting small values in the matrix. + return math.ldexp(1.0, -1022) // Smallest positive normal number. +} diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v index 878c7b5b1..5a1dc8d29 100644 --- a/lapack/lapack64/dgetrf.v +++ b/lapack/lapack64/dgetrf.v @@ -53,7 +53,7 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, mut ipiv []int) { if nb <= 1 || nb >= mn { // use the unblocked algorithm. - dgetf2(m, n, mut a, lda, ipiv) + dgetf2(m, n, mut a, lda, mut ipiv) return } @@ -61,28 +61,31 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, mut ipiv []int) { jb := math.min(mn - j, nb) // factor diagonal and subdiagonal blocks and test for exact singularity. - dgetf2(m - j, jb, mut a[j * lda + j..], lda, ipiv[j..j + jb]) + mut slice1 := unsafe { ipiv[j..j + jb] } + dgetf2(m - j, jb, mut a[j * lda + j..], lda, mut slice1) for i := j; i <= math.min(m - 1, j + jb - 1); i++ { ipiv[i] += j } // apply interchanges to columns 1..j-1. - dlaswp(j, mut a, lda, j, j + jb - 1, ipiv[..j + jb], 1) + mut slice_ipiv1 := unsafe { ipiv[..j + jb] } + dlaswp(j, mut a, lda, j, j + jb - 1, mut slice_ipiv1, 1) if j + jb < n { // apply interchanges to columns 1..j-1. - mut slice1 := unsafe { a[j + jb..] } - dlaswp(j, mut slice1, lda, j, j + jb, ipiv[..j + jb], 1) + mut slice2 := unsafe { a[j + jb..] } + mut slice_ipiv2 := unsafe { ipiv[..j + jb] } + dlaswp(j, mut slice2, lda, j, j + jb, mut slice_ipiv2, 1) - mut slice2 := unsafe { a[j * lda + j + jb..] } - blas.dtstrf(.left, false, false, .unit, jb, n - j - jb, 1, a[j * lda + j..], - lda, mut slice2, lda) + mut slice3 := unsafe { a[j * lda + j + jb..] } + blas.dtrsm(.left, false, false, .unit, jb, n - j - jb, 1, a[j * lda + j..], + lda, mut slice3, lda) if j + jb < m { - mut slice3 := unsafe { a[(j + jb) * lda + j + jb..] } + mut slice4 := unsafe { a[(j + jb) * lda + j + jb..] } blas.dgemm(false, false, m - j - jb, n - j - jb, jb, -1, a[(j + jb) * lda + j..], - lda, a[j * lda + j + jb..], lda, 1, mut slice3, lda) + lda, a[j * lda + j + jb..], lda, 1, mut slice4, lda) } } } diff --git a/lapack/lapack64/dgetri.v b/lapack/lapack64/dgetri.v index 8cd14300e..0b466ad6a 100644 --- a/lapack/lapack64/dgetri.v +++ b/lapack/lapack64/dgetri.v @@ -4,7 +4,7 @@ import math import vsl.blas // dgetri computes the inverse of a matrix using the LU factorization computed by dgetrf. -pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) int { +pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) int { if n == 0 { return 0 } @@ -26,6 +26,6 @@ pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) int { } // Placeholder for the actual LAPACK function calls - // Example: info = dgetri(n, a, lda, ipiv, work, lwork) + // Example: info = dgetri(n, a, lda, mut ipiv, work, lwork) return info } diff --git a/lapack/lapack64/dgetrs.v b/lapack/lapack64/dgetrs.v index bbbc586b0..9e1424600 100644 --- a/lapack/lapack64/dgetrs.v +++ b/lapack/lapack64/dgetrs.v @@ -16,7 +16,7 @@ import vsl.blas // // a and ipiv contain the LU factorization of A and the permutation indices as // computed by Dgetrf. ipiv is zero-indexed. -pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb int) { +pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []f64, lda int, mut ipiv []int, mut b []f64, ldb int) { if trans != .no_trans && trans != .trans && trans != .conj_trans { panic(bad_trans) } @@ -50,7 +50,7 @@ pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []f64, lda int, ipiv if trans != .no_trans { // Solve A * X = B. - dlaswp(nrhs, b, ldb, 0, n - 1, ipiv, 1) + dlaswp(nrhs, mut b, ldb, 0, n - 1, mut ipiv, 1) // Solve L * X = B, overwriting B with X. blas.dtrsm(.left, false, false, .unit, n, nrhs, 1, a, lda, mut b, ldb) // Solve U * X = B, overwriting B with X. @@ -62,5 +62,5 @@ pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []f64, lda int, ipiv blas.dtrsm(.left, true, true, .non_unit, n, nrhs, 1, a, lda, mut b, ldb) // Solve Lᵀ * X = B, overwriting B with X. blas.dtrsm(.left, false, true, .unit, n, nrhs, 1, a, lda, mut b, ldb) - dlaswp(nrhs, b, ldb, 0, n - 1, ipiv, -1) + dlaswp(nrhs, mut b, ldb, 0, n - 1, mut ipiv, -1) } diff --git a/lapack/lapack64/dlaswp.v b/lapack/lapack64/dlaswp.v new file mode 100644 index 000000000..5f6a53c8d --- /dev/null +++ b/lapack/lapack64/dlaswp.v @@ -0,0 +1,44 @@ +module lapack64 + +import math +import vsl.blas + +pub fn dlaswp(n int, mut a []f64, lda int, k1 int, k2 int, mut ipiv []int, incx int) { + if n < 0 { + panic(n_lt0) + } else if k1 < 0 { + panic(bad_k1) + } else if k2 < k1 { + panic(bad_k2) + } else if lda < math.max(1, n) { + panic(bad_ld_a) + } else if a.len < k2 * lda + n { + // A must have at least k2+1 rows. + panic(short_a) + } else if ipiv.len != k2 + 1 { + panic(bad_len_ipiv) + } else if incx != 1 && incx != -1 { + panic(abs_inc_not_one) + } + + if n == 0 { + return + } + + if incx == 1 { + for k := k1; k <= k2; k++ { + if k == ipiv[k] { + continue + } + blas.dswap(n, mut a[k * lda..], 1, mut a[ipiv[k] * lda..], 1) + } + return + } + + for k := k2; k >= k1; k-- { + if k == ipiv[k] { + continue + } + blas.dswap(n, mut a[k * lda..], 1, mut a[ipiv[k] * lda..], 1) + } +} diff --git a/lapack/lapack64/iparmq.v b/lapack/lapack64/iparmq.v new file mode 100644 index 000000000..5917d7109 --- /dev/null +++ b/lapack/lapack64/iparmq.v @@ -0,0 +1,83 @@ +module lapack64 + +import math + +fn iparmq(ispec int, name string, opts string, n int, ilo int, ihi int, lwork int) int { + nh := ihi - ilo + 1 + mut ns := 2 + if nh >= 30 { + ns = 4 + } else if nh >= 60 { + ns = 10 + } else if nh >= 150 { + ns = math.max(10, nh / int(math.log(nh) / math.ln2)) + } else if nh >= 590 { + ns = 64 + } else if nh >= 3000 { + ns = 128 + } else if nh >= 6000 { + ns = 256 + } + ns = math.max(2, ns - (ns % 2)) + + match ispec { + 12 { + // Matrices of order smaller than nmin get sent to Dlahqr, the + // classic double shift algorithm. This must be at least 11. + nmin := 75 + return nmin + } + 13 { + knwswp := 500 + if nh <= knwswp { + return ns + } + return 3 * ns / 2 + } + 14 { + // Skip a computationally expensive multi-shift QR sweep with + // Dlaqr5 whenever aggressive early deflation finds at least + // nibble*(window size)/100 deflations. The default, small, + // value reflects the expectation that the cost of looking + // through the deflation window with Dlaqr3 will be + // substantially smaller. + nibble := 14 + return nibble + } + 15 { + return ns + } + 16 { + if name.len != 6 { + panic('bad name length') + } + k22min := 14 + kacmin := 14 + mut acc22 := 0 + if name[1..].starts_with('GGHRD') || name[1..].starts_with('GGHD3') { + acc22 = 1 + if nh >= k22min { + acc22 = 2 + } + } else if name[3..].starts_with('EXC') { + if nh >= kacmin { + acc22 = 1 + } + if nh >= k22min { + acc22 = 2 + } + } else if name[1..].starts_with('HSEQR') || name[1..5].starts_with('LAQR') { + if ns >= kacmin { + acc22 = 1 + } + if ns >= k22min { + acc22 = 2 + } + } + return acc22 + } + else { + panic('bad ispec') + } + } +} diff --git a/lapack/lapack_d_vsl_lapack_lapacke.v b/lapack/lapack_d_vsl_lapack_lapacke.v index cbc7d9839..9bc8314d9 100644 --- a/lapack/lapack_d_vsl_lapack_lapacke.v +++ b/lapack/lapack_d_vsl_lapack_lapacke.v @@ -43,11 +43,11 @@ fn C.LAPACKE_dgehrd(matrix_layout blas.MemoryLayout, n int, ilo int, ihi int, a // system of equations A * X = B. // // NOTE: matrix 'a' will be modified -pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb int) { +pub fn dgesv(n int, nrhs int, mut a []f64, lda int, mut ipiv []int, mut b []f64, ldb int) { if ipiv.len != n { errors.vsl_panic('ipiv.len must be equal to n. ${ipiv.len} != ${n}\n', .efailed) } - info := C.LAPACKE_dgesv(.row_major, n, nrhs, unsafe { &a[0] }, lda, &ipiv[0], unsafe { &b[0] }, + info := C.LAPACKE_dgesv(.row_major, n, nrhs, unsafe { &a[0] }, lda, unsafe { &ipiv[0] }, unsafe { &b[0] }, ldb) if info != 0 { errors.vsl_panic('lapack failed', .efailed) @@ -98,7 +98,7 @@ pub fn dgesvd(jobu SVDJob, jobvt SVDJob, m int, n int, mut a []f64, lda int, s [ // // NOTE: (1) matrix 'a' will be modified // (2) ipiv indices are 1-based (i.e. Fortran) -pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) { +pub fn dgetrf(m int, n int, mut a []f64, lda int, mut ipiv []int) { unsafe { info := C.LAPACKE_dgetrf(.row_major, m, n, &a[0], lda, &ipiv[0]) if info != 0 { @@ -115,7 +115,7 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) { // // This method inverts U and then computes inv(A) by solving the system // inv(A)*L = inv(U) for inv(A). -pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) { +pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) { unsafe { info := C.LAPACKE_dgetri(.row_major, n, &a[0], lda, &ipiv[0]) if info != 0 { diff --git a/lapack/lapack_notd_vsl_lapack_lapacke.v b/lapack/lapack_notd_vsl_lapack_lapacke.v index 39d2cbdd6..2730f4ef6 100644 --- a/lapack/lapack_notd_vsl_lapack_lapacke.v +++ b/lapack/lapack_notd_vsl_lapack_lapacke.v @@ -27,8 +27,8 @@ import vsl.lapack.lapack64 // // NOTE: matrix 'a' will be modified @[inline] -pub fn dgesv(n int, nrhs int, mut a []f64, lda int, ipiv []int, mut b []f64, ldb int) { - lapack64.dgesv(n, nrhs, mut a, lda, ipiv, mut b, ldb) +pub fn dgesv(n int, nrhs int, mut a []f64, lda int, mut ipiv []int, mut b []f64, ldb int) { + lapack64.dgesv(n, nrhs, mut a, lda, mut ipiv, mut b, ldb) } // dgesvd computes the singular value decomposition (SVD) of a real M-by-N matrix A, optionally computing the left and/or right singular vectors. @@ -75,8 +75,8 @@ pub fn dgesvd(jobu SVDJob, jobvt SVDJob, m int, n int, mut a []f64, lda int, s [ // // NOTE: (1) matrix 'a' will be modified // (2) ipiv indices are 1-based (i.e. Fortran) -pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) { - lapack64.dgetrf(m, n, mut a, lda, ipiv) +pub fn dgetrf(m int, n int, mut a []f64, lda int, mut ipiv []int) { + lapack64.dgetrf(m, n, mut a, lda, mut ipiv) } // dgetri computes the inverse of a matrix using the LU factorization computed by DGETRF. @@ -87,8 +87,8 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, ipiv []int) { // // This method inverts U and then computes inv(A) by solving the system // inv(A)*L = inv(U) for inv(A). -pub fn dgetri(n int, mut a []f64, lda int, ipiv []int) { - info := lapack64.dgetri(n, mut a, lda, ipiv) +pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) { + info := lapack64.dgetri(n, mut a, lda, mut ipiv) if info != 0 { errors.vsl_panic('lapack failed', .efailed) } From f6cb78224ad9bf1301e226a3e785651a059a2f37 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Tue, 18 Jun 2024 02:11:33 -0300 Subject: [PATCH 18/33] refactor: Update execute tests step in ci.yml to use Pure C Backend with LAPACKE --- .github/workflows/ci.yml | 8 ++++---- bin/test | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7b943ecb4..9ba2cf481 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -67,8 +67,8 @@ jobs: - name: Execute Tests using Pure V Backend run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} - - name: Execute Tests using Pure V Backend with Pure C Blas - run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} --use-cblas + - name: Execute Tests using Pure V Backend with Pure C Backend + run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} --use-cblas --use-lapacke run-tests-on-macos: runs-on: ${{ matrix.os }} @@ -113,5 +113,5 @@ jobs: - name: Execute Tests using Pure V Backend run: ~/.vmodules/vsl/bin/test - - name: Execute Tests using Pure V Backend with Pure C Blas - run: ~/.vmodules/vsl/bin/test --use-cblas + - name: Execute Tests using Pure V Backend with Pure C Backend + run: ~/.vmodules/vsl/bin/test --use-cblas --use-lapacke diff --git a/bin/test b/bin/test index d4e1878ed..1c171926d 100755 --- a/bin/test +++ b/bin/test @@ -10,6 +10,7 @@ ## --stats Execute with stats ## --prod Execute with prod build ## --use-cblas Execute tests using cblas +## --use-lapacke Execute tests using lapacke ## --use-autofree Execute tests using atofree ## --use-gc=STRATEGY Execute tests using garbage collector ## --skip-examples Skip examples compilation @@ -31,6 +32,11 @@ if [[ -n "${use_cblas}" ]]; then flags="${flags} -d vsl_blas_cblas" fi +if [[ -n "${use_lapacke}" ]]; then + echo "Running tests using LAPACKE" + flags="${flags} -d vsl_lapack_lapacke" +fi + if [[ -n "${use_autofree}" ]]; then echo "Running tests using V Math" flags="${flags} -autofree" From 569a96b9cca7bc33093350d704f15393460548e0 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Tue, 18 Jun 2024 02:17:47 -0300 Subject: [PATCH 19/33] refactor: Comment out test execution step in ci.yml --- .github/workflows/ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9ba2cf481..1cdfa15e6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,8 +64,8 @@ jobs: - name: Move VSL source code to V Modules run: mv ./vsl ~/.vmodules - - name: Execute Tests using Pure V Backend - run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} + # - name: Execute Tests using Pure V Backend + # run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} - name: Execute Tests using Pure V Backend with Pure C Backend run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} --use-cblas --use-lapacke @@ -110,8 +110,8 @@ jobs: - name: Move VSL source code to V Modules run: mv ./vsl ~/.vmodules - - name: Execute Tests using Pure V Backend - run: ~/.vmodules/vsl/bin/test + # - name: Execute Tests using Pure V Backend + # run: ~/.vmodules/vsl/bin/test - name: Execute Tests using Pure V Backend with Pure C Backend run: ~/.vmodules/vsl/bin/test --use-cblas --use-lapacke From d8a4fc2ac24528926beb1ced1449094d5a46fa62 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Tue, 18 Jun 2024 02:33:20 -0300 Subject: [PATCH 20/33] refactor: Update create_image_2d function to use local variable for format --- blas/blas64/dgemm.v | 21 +- blas/conversions.v | 20 -- blas/oblas_d_vsl_blas_cblas.v | 428 +++++++++++++-------------- blas/oblas_notd_vsl_blas_cblas.v | 25 +- float/float64/gemv_test.v | 10 +- la/blas.v | 34 +-- lapack/lapack64/dgetrf.v | 6 +- lapack/lapack64/dgetrs.v | 8 +- lapack/lapack_d_vsl_lapack_lapacke.v | 8 +- 9 files changed, 261 insertions(+), 299 deletions(-) diff --git a/blas/blas64/dgemm.v b/blas/blas64/dgemm.v index f7bcfeb5e..86ef17acc 100644 --- a/blas/blas64/dgemm.v +++ b/blas/blas64/dgemm.v @@ -99,10 +99,11 @@ pub fn dgemm(trans_a Transpose, trans_b Transpose, m int, n int, k int, alpha f6 } } - dgemm_parallel(a_trans, b_trans, m, n, k, a, lda, b, ldb, mut c, ldc, alpha) + dgemm_parallel(if a_trans { .trans } else { .no_trans }, if b_trans { .trans } else { .no_trans }, + m, n, k, a, lda, b, ldb, mut c, ldc, alpha) } -fn dgemm_parallel(a_trans bool, b_trans bool, m int, n int, k int, a []f64, lda int, b []f64, ldb int, mut c []f64, ldc int, alpha f64) { +fn dgemm_parallel(a_trans Transpose, b_trans Transpose, m int, n int, k int, a []f64, lda int, b []f64, ldb int, mut c []f64, ldc int, alpha f64) { // dgemm_parallel computes a parallel matrix multiplication by partitioning // a and b into sub-blocks, and updating c with the multiplication of the sub-block // In all cases, @@ -155,7 +156,7 @@ fn dgemm_parallel(a_trans bool, b_trans bool, m int, n int, k int, a []f64, lda for i := 0; i < m; i += block_size { for j := 0; j < n; j += block_size { // worker_limit <- 0 - go fn (a_trans bool, b_trans bool, m int, n int, max_k_len int, a []f64, lda int, b []f64, ldb int, mut c []f64, ldc int, alpha f64, i int, j int, mut wg sync.WaitGroup) { + go fn (a_trans Transpose, b_trans Transpose, m int, n int, max_k_len int, a []f64, lda int, b []f64, ldb int, mut c []f64, ldc int, alpha f64, i int, j int, mut wg sync.WaitGroup) { defer { wg.done() // <-worker_limit @@ -180,12 +181,12 @@ fn dgemm_parallel(a_trans bool, b_trans bool, m int, n int, k int, a []f64, lda } mut a_sub := []f64{} mut b_sub := []f64{} - if a_trans { + if a_trans == .trans { a_sub = slice_view_f64(a, lda, k, i, lenk, leni) } else { a_sub = slice_view_f64(a, lda, i, k, leni, lenk) } - if b_trans { + if b_trans == .trans { b_sub = slice_view_f64(b, ldb, j, k, lenj, lenk) } else { b_sub = slice_view_f64(b, ldb, k, j, lenk, lenj) @@ -200,20 +201,20 @@ fn dgemm_parallel(a_trans bool, b_trans bool, m int, n int, k int, a []f64, lda } // dgemm_serial is serial matrix multiply -fn dgemm_serial(a_trans bool, b_trans bool, m int, n int, k int, a []f64, lda int, b []f64, ldb int, mut c []f64, ldc int, alpha f64) { - if !a_trans && !b_trans { +fn dgemm_serial(a_trans Transpose, b_trans Transpose, m int, n int, k int, a []f64, lda int, b []f64, ldb int, mut c []f64, ldc int, alpha f64) { + if a_trans != .trans && b_trans != .trans { dgemm_serial_not_not(m, n, k, a, lda, b, ldb, mut c, ldc, alpha) return } - if a_trans && !b_trans { + if a_trans == .trans && b_trans != .trans { dgemm_serial_trans_not(m, n, k, a, lda, b, ldb, mut c, ldc, alpha) return } - if !a_trans && b_trans { + if a_trans != .trans && b_trans == .trans { dgemm_serial_not_trans(m, n, k, a, lda, b, ldb, mut c, ldc, alpha) return } - if a_trans && b_trans { + if a_trans == .trans && b_trans == .trans { dgemm_serial_trans_trans(m, n, k, a, lda, b, ldb, mut c, ldc, alpha) return } diff --git a/blas/conversions.v b/blas/conversions.v index 553bcf16d..b421fa60c 100644 --- a/blas/conversions.v +++ b/blas/conversions.v @@ -21,26 +21,6 @@ pub type Diagonal = blas64.Diagonal // Side is used to specify whether a matrix is on the left or right side in a matrix-matrix multiplication. pub type Side = blas64.Side -// c_trans is a helper function to convert bool to Transpose -pub fn c_trans(trans bool) Transpose { - return if trans { .trans } else { .no_trans } -} - -// c_uplo is a helper function to convert bool to Uplo -pub fn c_uplo(up bool) Uplo { - return if up { .upper } else { .lower } -} - -// l_uplo is a helper function to convert bool to Uplo -pub fn l_uplo(up bool) u8 { - return if up { `U` } else { `L` } -} - -// job_vlr is a helper function to convert bool to char -pub fn job_vlr(do_calc bool) rune { - return if do_calc { `V` } else { `N` } -} - // slice_to_col_major converts nested slice into an array representing a col-major matrix // // _**NOTE**: make sure to have at least 1x1 item_ diff --git a/blas/oblas_d_vsl_blas_cblas.v b/blas/oblas_d_vsl_blas_cblas.v index 777038f3d..653a6c171 100644 --- a/blas/oblas_d_vsl_blas_cblas.v +++ b/blas/oblas_d_vsl_blas_cblas.v @@ -472,25 +472,25 @@ pub fn zdscal(n int, alpha f64, mut x voidptr, incx int) { } @[inline] -pub fn sgemv(trans bool, m int, n int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) { - C.cblas_sgemv(.row_major, c_trans(trans), m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, +pub fn sgemv(trans Transpose, m int, n int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) { + C.cblas_sgemv(.row_major, trans, m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy) } @[inline] -pub fn dgemv(trans bool, m int, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { - C.cblas_dgemv(.row_major, c_trans(trans), m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, +pub fn dgemv(trans Transpose, m int, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { + C.cblas_dgemv(.row_major, trans, m, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy) } @[inline] -pub fn cgemv(trans bool, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { - C.cblas_cgemv(.row_major, c_trans(trans), m, n, alpha, a, lda, x, incx, beta, y, incy) +pub fn cgemv(trans Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_cgemv(.row_major, trans, m, n, alpha, a, lda, x, incx, beta, y, incy) } @[inline] -pub fn zgemv(trans bool, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { - C.cblas_zgemv(.row_major, c_trans(trans), m, n, alpha, a, lda, x, incx, beta, y, incy) +pub fn zgemv(trans Transpose, m int, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_zgemv(.row_major, trans, m, n, alpha, a, lda, x, incx, beta, y, incy) } @[inline] @@ -526,443 +526,427 @@ pub fn zgerc(m int, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy i } @[inline] -pub fn strsv(uplo bool, trans bool, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) { - C.cblas_strsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &a[0] }, - lda, unsafe { &x[0] }, incx) +pub fn strsv(uplo Uplo, trans Transpose, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) { + C.cblas_strsv(.row_major, uplo, trans, diag, n, unsafe { &a[0] }, lda, unsafe { &x[0] }, + incx) } @[inline] -pub fn dtrsv(uplo bool, trans bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { - C.cblas_dtrsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &a[0] }, - lda, unsafe { &x[0] }, incx) +pub fn dtrsv(uplo Uplo, trans Transpose, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { + C.cblas_dtrsv(.row_major, uplo, trans, diag, n, unsafe { &a[0] }, lda, unsafe { &x[0] }, + incx) } @[inline] -pub fn ctrsv(uplo bool, trans bool, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) { - C.cblas_ctrsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, a, lda, x, incx) +pub fn ctrsv(uplo Uplo, trans Transpose, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) { + C.cblas_ctrsv(.row_major, uplo, trans, diag, n, a, lda, x, incx) } @[inline] -pub fn ztrsv(uplo bool, trans bool, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) { - C.cblas_ztrsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, a, lda, x, incx) +pub fn ztrsv(uplo Uplo, trans Transpose, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) { + C.cblas_ztrsv(.row_major, uplo, trans, diag, n, a, lda, x, incx) } @[inline] -pub fn strmv(uplo bool, trans bool, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) { - C.cblas_strmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &a[0] }, - lda, unsafe { &x[0] }, incx) +pub fn strmv(uplo Uplo, trans Transpose, diag Diagonal, n int, a []f32, lda int, mut x []f32, incx int) { + C.cblas_strmv(.row_major, uplo, trans, diag, n, unsafe { &a[0] }, lda, unsafe { &x[0] }, + incx) } @[inline] -pub fn dtrmv(uplo bool, trans bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { - C.cblas_dtrmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &a[0] }, - lda, unsafe { &x[0] }, incx) +pub fn dtrmv(uplo Uplo, trans Transpose, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { + C.cblas_dtrmv(.row_major, uplo, trans, diag, n, unsafe { &a[0] }, lda, unsafe { &x[0] }, + incx) } @[inline] -pub fn ctrmv(uplo bool, trans bool, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) { - C.cblas_ctrmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, a, lda, x, incx) +pub fn ctrmv(uplo Uplo, trans Transpose, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) { + C.cblas_ctrmv(.row_major, uplo, trans, diag, n, a, lda, x, incx) } @[inline] -pub fn ztrmv(uplo bool, trans bool, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) { - C.cblas_ztrmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, a, lda, x, incx) +pub fn ztrmv(uplo Uplo, trans Transpose, diag Diagonal, n int, a voidptr, lda int, mut x voidptr, incx int) { + C.cblas_ztrmv(.row_major, uplo, trans, diag, n, a, lda, x, incx) } @[inline] -pub fn ssyr(uplo bool, n int, alpha f32, x []f32, incx int, mut a []f32, lda int) { - C.cblas_ssyr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] }, +pub fn ssyr(uplo Uplo, n int, alpha f32, x []f32, incx int, mut a []f32, lda int) { + C.cblas_ssyr(.row_major, uplo, n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] }, lda) } @[inline] -pub fn dsyr(uplo bool, n int, alpha f64, x []f64, incx int, mut a []f64, lda int) { - C.cblas_dsyr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] }, +pub fn dsyr(uplo Uplo, n int, alpha f64, x []f64, incx int, mut a []f64, lda int) { + C.cblas_dsyr(.row_major, uplo, n, alpha, unsafe { &x[0] }, incx, unsafe { &a[0] }, lda) } @[inline] -pub fn cher(uplo bool, n int, alpha f32, x voidptr, incx int, mut a voidptr, lda int) { - C.cblas_cher(.row_major, c_uplo(uplo), n, alpha, x, incx, a, lda) +pub fn cher(uplo Uplo, n int, alpha f32, x voidptr, incx int, mut a voidptr, lda int) { + C.cblas_cher(.row_major, uplo, n, alpha, x, incx, a, lda) } @[inline] -pub fn zher(uplo bool, n int, alpha f64, x voidptr, incx int, mut a voidptr, lda int) { - C.cblas_zher(.row_major, c_uplo(uplo), n, alpha, x, incx, a, lda) +pub fn zher(uplo Uplo, n int, alpha f64, x voidptr, incx int, mut a voidptr, lda int) { + C.cblas_zher(.row_major, uplo, n, alpha, x, incx, a, lda) } @[inline] -pub fn ssyr2(uplo bool, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32, lda int) { - C.cblas_ssyr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, +pub fn ssyr2(uplo Uplo, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32, lda int) { + C.cblas_ssyr2(.row_major, uplo, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, unsafe { &a[0] }, lda) } @[inline] -pub fn dsyr2(uplo bool, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) { - C.cblas_dsyr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, +pub fn dsyr2(uplo Uplo, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) { + C.cblas_dsyr2(.row_major, uplo, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, unsafe { &a[0] }, lda) } @[inline] -pub fn cher2(uplo bool, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) { - C.cblas_cher2(.row_major, c_uplo(uplo), n, alpha, x, incx, y, incy, a, lda) +pub fn cher2(uplo Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) { + C.cblas_cher2(.row_major, uplo, n, alpha, x, incx, y, incy, a, lda) } @[inline] -pub fn zher2(uplo bool, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) { - C.cblas_zher2(.row_major, c_uplo(uplo), n, alpha, x, incx, y, incy, a, lda) +pub fn zher2(uplo Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut a voidptr, lda int) { + C.cblas_zher2(.row_major, uplo, n, alpha, x, incx, y, incy, a, lda) } @[inline] -pub fn sgbmv(trans bool, m int, n int, kl int, ku int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) { - C.cblas_sgbmv(.row_major, c_trans(trans), m, n, kl, ku, alpha, unsafe { &a[0] }, lda, - unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy) +pub fn sgbmv(trans Transpose, m int, n int, kl int, ku int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) { + C.cblas_sgbmv(.row_major, trans, m, n, kl, ku, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, + incx, beta, unsafe { &y[0] }, incy) } @[inline] -pub fn dgbmv(trans bool, m int, n int, kl int, ku int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { - C.cblas_dgbmv(.row_major, c_trans(trans), m, n, kl, ku, alpha, unsafe { &a[0] }, lda, - unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy) +pub fn dgbmv(trans Transpose, m int, n int, kl int, ku int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { + C.cblas_dgbmv(.row_major, trans, m, n, kl, ku, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, + incx, beta, unsafe { &y[0] }, incy) } @[inline] -pub fn cgbmv(trans bool, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { - C.cblas_cgbmv(.row_major, c_trans(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta, - y, incy) +pub fn cgbmv(trans Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_cgbmv(.row_major, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy) } @[inline] -pub fn zgbmv(trans bool, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { - C.cblas_zgbmv(.row_major, c_trans(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta, - y, incy) +pub fn zgbmv(trans Transpose, m int, n int, kl int, ku int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_zgbmv(.row_major, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy) } @[inline] -pub fn ssbmv(uplo bool, n int, k int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) { - C.cblas_ssbmv(.row_major, c_uplo(uplo), n, k, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, +pub fn ssbmv(uplo Uplo, n int, k int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) { + C.cblas_ssbmv(.row_major, uplo, n, k, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy) } @[inline] -pub fn dsbmv(uplo bool, n int, k int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { - C.cblas_dsbmv(.row_major, c_uplo(uplo), n, k, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, +pub fn dsbmv(uplo Uplo, n int, k int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { + C.cblas_dsbmv(.row_major, uplo, n, k, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy) } @[inline] -pub fn stbmv(uplo bool, trans bool, diag Diagonal, n int, k int, a []f32, lda int, mut x []f32, incx int) { - C.cblas_stbmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, unsafe { &a[0] }, - lda, unsafe { &x[0] }, incx) +pub fn stbmv(uplo Uplo, trans Transpose, diag Diagonal, n int, k int, a []f32, lda int, mut x []f32, incx int) { + C.cblas_stbmv(.row_major, uplo, trans, diag, n, k, unsafe { &a[0] }, lda, unsafe { &x[0] }, + incx) } @[inline] -pub fn dtbmv(uplo bool, trans bool, diag Diagonal, n int, k int, a []f64, lda int, mut x []f64, incx int) { - C.cblas_dtbmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, unsafe { &a[0] }, - lda, unsafe { &x[0] }, incx) +pub fn dtbmv(uplo Uplo, trans Transpose, diag Diagonal, n int, k int, a []f64, lda int, mut x []f64, incx int) { + C.cblas_dtbmv(.row_major, uplo, trans, diag, n, k, unsafe { &a[0] }, lda, unsafe { &x[0] }, + incx) } @[inline] -pub fn ctbmv(uplo bool, trans bool, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) { - C.cblas_ctbmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, a, lda, x, incx) +pub fn ctbmv(uplo Uplo, trans Transpose, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) { + C.cblas_ctbmv(.row_major, uplo, trans, diag, n, k, a, lda, x, incx) } @[inline] -pub fn ztbmv(uplo bool, trans bool, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) { - C.cblas_ztbmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, a, lda, x, incx) +pub fn ztbmv(uplo Uplo, trans Transpose, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) { + C.cblas_ztbmv(.row_major, uplo, trans, diag, n, k, a, lda, x, incx) } @[inline] -pub fn stbsv(uplo bool, trans bool, diag Diagonal, n int, k int, a []f32, lda int, mut x []f32, incx int) { - C.cblas_stbsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, unsafe { &a[0] }, - lda, unsafe { &x[0] }, incx) +pub fn stbsv(uplo Uplo, trans Transpose, diag Diagonal, n int, k int, a []f32, lda int, mut x []f32, incx int) { + C.cblas_stbsv(.row_major, uplo, trans, diag, n, k, unsafe { &a[0] }, lda, unsafe { &x[0] }, + incx) } @[inline] -pub fn dtbsv(uplo bool, trans bool, diag Diagonal, n int, k int, a []f64, lda int, mut x []f64, incx int) { - C.cblas_dtbsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, unsafe { &a[0] }, - lda, unsafe { &x[0] }, incx) +pub fn dtbsv(uplo Uplo, trans Transpose, diag Diagonal, n int, k int, a []f64, lda int, mut x []f64, incx int) { + C.cblas_dtbsv(.row_major, uplo, trans, diag, n, k, unsafe { &a[0] }, lda, unsafe { &x[0] }, + incx) } @[inline] -pub fn ctbsv(uplo bool, trans bool, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) { - C.cblas_ctbsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, a, lda, x, incx) +pub fn ctbsv(uplo Uplo, trans Transpose, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) { + C.cblas_ctbsv(.row_major, uplo, trans, diag, n, k, a, lda, x, incx) } @[inline] -pub fn ztbsv(uplo bool, trans bool, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) { - C.cblas_ztbsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, k, a, lda, x, incx) +pub fn ztbsv(uplo Uplo, trans Transpose, diag Diagonal, n int, k int, a voidptr, lda int, mut x voidptr, incx int) { + C.cblas_ztbsv(.row_major, uplo, trans, diag, n, k, a, lda, x, incx) } @[inline] -pub fn stpmv(uplo bool, trans bool, diag Diagonal, n int, ap []f32, mut x []f32, incx int) { - C.cblas_stpmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &ap[0] }, - unsafe { &x[0] }, incx) +pub fn stpmv(uplo Uplo, trans Transpose, diag Diagonal, n int, ap []f32, mut x []f32, incx int) { + C.cblas_stpmv(.row_major, uplo, trans, diag, n, unsafe { &ap[0] }, unsafe { &x[0] }, + incx) } @[inline] -pub fn dtpmv(uplo bool, trans bool, diag Diagonal, n int, ap []f64, mut x []f64, incx int) { - C.cblas_dtpmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &ap[0] }, - unsafe { &x[0] }, incx) +pub fn dtpmv(uplo Uplo, trans Transpose, diag Diagonal, n int, ap []f64, mut x []f64, incx int) { + C.cblas_dtpmv(.row_major, uplo, trans, diag, n, unsafe { &ap[0] }, unsafe { &x[0] }, + incx) } @[inline] -pub fn ctpmv(uplo bool, trans bool, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) { - C.cblas_ctpmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, ap, x, incx) +pub fn ctpmv(uplo Uplo, trans Transpose, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) { + C.cblas_ctpmv(.row_major, uplo, trans, diag, n, ap, x, incx) } @[inline] -pub fn ztpmv(uplo bool, trans bool, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) { - C.cblas_ztpmv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, ap, x, incx) +pub fn ztpmv(uplo Uplo, trans Transpose, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) { + C.cblas_ztpmv(.row_major, uplo, trans, diag, n, ap, x, incx) } @[inline] -pub fn stpsv(uplo bool, trans bool, diag Diagonal, n int, ap []f32, mut x []f32, incx int) { - C.cblas_stpsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &ap[0] }, - unsafe { &x[0] }, incx) +pub fn stpsv(uplo Uplo, trans Transpose, diag Diagonal, n int, ap []f32, mut x []f32, incx int) { + C.cblas_stpsv(.row_major, uplo, trans, diag, n, unsafe { &ap[0] }, unsafe { &x[0] }, + incx) } @[inline] -pub fn dtpsv(uplo bool, trans bool, diag Diagonal, n int, ap []f64, mut x []f64, incx int) { - C.cblas_dtpsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, unsafe { &ap[0] }, - unsafe { &x[0] }, incx) +pub fn dtpsv(uplo Uplo, trans Transpose, diag Diagonal, n int, ap []f64, mut x []f64, incx int) { + C.cblas_dtpsv(.row_major, uplo, trans, diag, n, unsafe { &ap[0] }, unsafe { &x[0] }, + incx) } @[inline] -pub fn ctpsv(uplo bool, trans bool, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) { - C.cblas_ctpsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, ap, x, incx) +pub fn ctpsv(uplo Uplo, trans Transpose, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) { + C.cblas_ctpsv(.row_major, uplo, trans, diag, n, ap, x, incx) } @[inline] -pub fn ztpsv(uplo bool, trans bool, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) { - C.cblas_ztpsv(.row_major, c_uplo(uplo), c_trans(trans), diag, n, ap, x, incx) +pub fn ztpsv(uplo Uplo, trans Transpose, diag Diagonal, n int, ap voidptr, mut x voidptr, incx int) { + C.cblas_ztpsv(.row_major, uplo, trans, diag, n, ap, x, incx) } @[inline] -pub fn ssymv(uplo bool, n int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) { - C.cblas_ssymv(.row_major, c_uplo(uplo), n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, +pub fn ssymv(uplo Uplo, n int, alpha f32, a []f32, lda int, x []f32, incx int, beta f32, mut y []f32, incy int) { + C.cblas_ssymv(.row_major, uplo, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy) } @[inline] -pub fn dsymv(uplo bool, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { - C.cblas_dsymv(.row_major, c_uplo(uplo), n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, +pub fn dsymv(uplo Uplo, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { + C.cblas_dsymv(.row_major, uplo, n, alpha, unsafe { &a[0] }, lda, unsafe { &x[0] }, incx, beta, unsafe { &y[0] }, incy) } @[inline] -pub fn chemv(uplo bool, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { - C.cblas_chemv(.row_major, c_uplo(uplo), n, alpha, a, lda, x, incx, beta, y, incy) +pub fn chemv(uplo Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_chemv(.row_major, uplo, n, alpha, a, lda, x, incx, beta, y, incy) } @[inline] -pub fn zhemv(uplo bool, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { - C.cblas_zhemv(.row_major, c_uplo(uplo), n, alpha, a, lda, x, incx, beta, y, incy) +pub fn zhemv(uplo Uplo, n int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_zhemv(.row_major, uplo, n, alpha, a, lda, x, incx, beta, y, incy) } @[inline] -pub fn sspmv(uplo bool, n int, alpha f32, ap []f32, x []f32, incx int, beta f32, mut y []f32, incy int) { - C.cblas_sspmv(.row_major, c_uplo(uplo), n, alpha, unsafe { &ap[0] }, unsafe { &x[0] }, - incx, beta, unsafe { &y[0] }, incy) +pub fn sspmv(uplo Uplo, n int, alpha f32, ap []f32, x []f32, incx int, beta f32, mut y []f32, incy int) { + C.cblas_sspmv(.row_major, uplo, n, alpha, unsafe { &ap[0] }, unsafe { &x[0] }, incx, + beta, unsafe { &y[0] }, incy) } @[inline] -pub fn dspmv(uplo bool, n int, alpha f64, ap []f64, x []f64, incx int, beta f64, mut y []f64, incy int) { - C.cblas_dspmv(.row_major, c_uplo(uplo), n, alpha, unsafe { &ap[0] }, unsafe { &x[0] }, - incx, beta, unsafe { &y[0] }, incy) +pub fn dspmv(uplo Uplo, n int, alpha f64, ap []f64, x []f64, incx int, beta f64, mut y []f64, incy int) { + C.cblas_dspmv(.row_major, uplo, n, alpha, unsafe { &ap[0] }, unsafe { &x[0] }, incx, + beta, unsafe { &y[0] }, incy) } @[inline] -pub fn sspr(uplo bool, n int, alpha f32, x []f32, incx int, mut ap []f32) { - C.cblas_sspr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &ap[0] }) +pub fn sspr(uplo Uplo, n int, alpha f32, x []f32, incx int, mut ap []f32) { + C.cblas_sspr(.row_major, uplo, n, alpha, unsafe { &x[0] }, incx, unsafe { &ap[0] }) } @[inline] -pub fn dspr(uplo bool, n int, alpha f64, x []f64, incx int, mut ap []f64) { - C.cblas_dspr(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &ap[0] }) +pub fn dspr(uplo Uplo, n int, alpha f64, x []f64, incx int, mut ap []f64) { + C.cblas_dspr(.row_major, uplo, n, alpha, unsafe { &x[0] }, incx, unsafe { &ap[0] }) } @[inline] -pub fn chpr(uplo bool, n int, alpha f32, x voidptr, incx int, mut a voidptr) { - C.cblas_chpr(.row_major, c_uplo(uplo), n, alpha, x, incx, a) +pub fn chpr(uplo Uplo, n int, alpha f32, x voidptr, incx int, mut a voidptr) { + C.cblas_chpr(.row_major, uplo, n, alpha, x, incx, a) } @[inline] -pub fn zhpr(uplo bool, n int, alpha f64, x voidptr, incx int, mut a voidptr) { - C.cblas_zhpr(.row_major, c_uplo(uplo), n, alpha, x, incx, a) +pub fn zhpr(uplo Uplo, n int, alpha f64, x voidptr, incx int, mut a voidptr) { + C.cblas_zhpr(.row_major, uplo, n, alpha, x, incx, a) } @[inline] -pub fn sspr2(uplo bool, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32) { - C.cblas_sspr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, +pub fn sspr2(uplo Uplo, n int, alpha f32, x []f32, incx int, y []f32, incy int, mut a []f32) { + C.cblas_sspr2(.row_major, uplo, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, unsafe { &a[0] }) } @[inline] -pub fn dspr2(uplo bool, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64) { - C.cblas_dspr2(.row_major, c_uplo(uplo), n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, +pub fn dspr2(uplo Uplo, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64) { + C.cblas_dspr2(.row_major, uplo, n, alpha, unsafe { &x[0] }, incx, unsafe { &y[0] }, incy, unsafe { &a[0] }) } @[inline] -pub fn chpr2(uplo bool, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut ap voidptr) { - C.cblas_chpr2(.row_major, c_uplo(uplo), n, alpha, x, incx, y, incy, ap) +pub fn chpr2(uplo Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut ap voidptr) { + C.cblas_chpr2(.row_major, uplo, n, alpha, x, incx, y, incy, ap) } @[inline] -pub fn zhpr2(uplo bool, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut ap voidptr) { - C.cblas_zhpr2(.row_major, c_uplo(uplo), n, alpha, x, incx, y, incy, ap) +pub fn zhpr2(uplo Uplo, n int, alpha voidptr, x voidptr, incx int, y voidptr, incy int, mut ap voidptr) { + C.cblas_zhpr2(.row_major, uplo, n, alpha, x, incx, y, incy, ap) } @[inline] -pub fn chbmv(uplo bool, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { - C.cblas_chbmv(.row_major, c_uplo(uplo), n, k, alpha, a, lda, x, incx, beta, y, incy) +pub fn chbmv(uplo Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_chbmv(.row_major, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy) } @[inline] -pub fn zhbmv(uplo bool, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { - C.cblas_zhbmv(.row_major, c_uplo(uplo), n, k, alpha, a, lda, x, incx, beta, y, incy) +pub fn zhbmv(uplo Uplo, n int, k int, alpha voidptr, a voidptr, lda int, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_zhbmv(.row_major, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy) } @[inline] -pub fn chpmv(uplo bool, n int, alpha voidptr, ap voidptr, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { - C.cblas_chpmv(.row_major, c_uplo(uplo), n, alpha, ap, x, incx, beta, y, incy) +pub fn chpmv(uplo Uplo, n int, alpha voidptr, ap voidptr, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_chpmv(.row_major, uplo, n, alpha, ap, x, incx, beta, y, incy) } @[inline] -pub fn zhpmv(uplo bool, n int, alpha voidptr, ap voidptr, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { - C.cblas_zhpmv(.row_major, c_uplo(uplo), n, alpha, ap, x, incx, beta, y, incy) +pub fn zhpmv(uplo Uplo, n int, alpha voidptr, ap voidptr, x voidptr, incx int, beta voidptr, mut y voidptr, incy int) { + C.cblas_zhpmv(.row_major, uplo, n, alpha, ap, x, incx, beta, y, incy) } @[inline] -pub fn ssyrk(uplo bool, trans bool, n int, k int, alpha f32, a []f32, lda int, beta f32, mut c []f32, ldc int) { - C.cblas_ssyrk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, unsafe { &a[0] }, - lda, beta, unsafe { &c[0] }, ldc) +pub fn ssyrk(uplo Uplo, trans Transpose, n int, k int, alpha f32, a []f32, lda int, beta f32, mut c []f32, ldc int) { + C.cblas_ssyrk(.row_major, uplo, trans, n, k, alpha, unsafe { &a[0] }, lda, beta, unsafe { &c[0] }, + ldc) } @[inline] -pub fn dsyrk(uplo bool, trans bool, n int, k int, alpha f64, a []f64, lda int, beta f64, mut c []f64, ldc int) { - C.cblas_dsyrk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, unsafe { &a[0] }, - lda, beta, unsafe { &c[0] }, ldc) +pub fn dsyrk(uplo Uplo, trans Transpose, n int, k int, alpha f64, a []f64, lda int, beta f64, mut c []f64, ldc int) { + C.cblas_dsyrk(.row_major, uplo, trans, n, k, alpha, unsafe { &a[0] }, lda, beta, unsafe { &c[0] }, + ldc) } @[inline] -pub fn csyrk(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, mut c voidptr, ldc int) { - C.cblas_csyrk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, beta, - c, ldc) +pub fn csyrk(uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, mut c voidptr, ldc int) { + C.cblas_csyrk(.row_major, uplo, trans, n, k, alpha, a, lda, beta, c, ldc) } @[inline] -pub fn zsyrk(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, mut c voidptr, ldc int) { - C.cblas_zsyrk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, beta, - c, ldc) +pub fn zsyrk(uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, beta voidptr, mut c voidptr, ldc int) { + C.cblas_zsyrk(.row_major, uplo, trans, n, k, alpha, a, lda, beta, c, ldc) } @[inline] -pub fn ssyr2k(uplo bool, trans bool, n int, k int, alpha f32, a []f32, lda int, b []f32, ldb int, beta f32, mut c []f32, ldc int) { - C.cblas_ssyr2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, unsafe { &a[0] }, - lda, unsafe { &b[0] }, ldb, beta, unsafe { &c[0] }, ldc) +pub fn ssyr2k(uplo Uplo, trans Transpose, n int, k int, alpha f32, a []f32, lda int, b []f32, ldb int, beta f32, mut c []f32, ldc int) { + C.cblas_ssyr2k(.row_major, uplo, trans, n, k, alpha, unsafe { &a[0] }, lda, unsafe { &b[0] }, + ldb, beta, unsafe { &c[0] }, ldc) } @[inline] -pub fn dsyr2k(uplo bool, trans bool, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut c []f64, ldc int) { - C.cblas_dsyr2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, unsafe { &a[0] }, - lda, unsafe { &b[0] }, ldb, beta, unsafe { &c[0] }, ldc) +pub fn dsyr2k(uplo Uplo, trans Transpose, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut c []f64, ldc int) { + C.cblas_dsyr2k(.row_major, uplo, trans, n, k, alpha, unsafe { &a[0] }, lda, unsafe { &b[0] }, + ldb, beta, unsafe { &c[0] }, ldc) } @[inline] -pub fn csyr2k(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) { - C.cblas_csyr2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, b, ldb, - beta, c, ldc) +pub fn csyr2k(uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) { + C.cblas_csyr2k(.row_major, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc) } @[inline] -pub fn zsyr2k(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) { - C.cblas_zsyr2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, b, ldb, - beta, c, ldc) +pub fn zsyr2k(uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) { + C.cblas_zsyr2k(.row_major, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc) } @[inline] -pub fn strmm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha f32, a []f32, lda int, mut b []f32, ldb int) { - C.cblas_strmm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, unsafe { &a[0] }, +pub fn strmm(side Side, uplo Uplo, trans Transpose, diag Diagonal, m int, n int, alpha f32, a []f32, lda int, mut b []f32, ldb int) { + C.cblas_strmm(.row_major, side, uplo, trans, diag, m, n, alpha, unsafe { &a[0] }, lda, unsafe { &b[0] }, ldb) } @[inline] -pub fn dtrmm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha f64, a []f64, lda int, mut b []f64, ldb int) { - C.cblas_dtrmm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, unsafe { &a[0] }, +pub fn dtrmm(side Side, uplo Uplo, trans Transpose, diag Diagonal, m int, n int, alpha f64, a []f64, lda int, mut b []f64, ldb int) { + C.cblas_dtrmm(.row_major, side, uplo, trans, diag, m, n, alpha, unsafe { &a[0] }, lda, unsafe { &b[0] }, ldb) } @[inline] -pub fn ctrmm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) { - C.cblas_ctrmm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, a, - lda, b, ldb) +pub fn ctrmm(side Side, uplo Uplo, trans Transpose, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) { + C.cblas_ctrmm(.row_major, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) } @[inline] -pub fn ztrmm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) { - C.cblas_ztrmm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, a, - lda, b, ldb) +pub fn ztrmm(side Side, uplo Uplo, trans Transpose, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) { + C.cblas_ztrmm(.row_major, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) } @[inline] -pub fn strsm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha f32, a []f32, lda int, mut b []f32, ldb int) { - C.cblas_strsm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, unsafe { &a[0] }, +pub fn strsm(side Side, uplo Uplo, trans Transpose, diag Diagonal, m int, n int, alpha f32, a []f32, lda int, mut b []f32, ldb int) { + C.cblas_strsm(.row_major, side, uplo, trans, diag, m, n, alpha, unsafe { &a[0] }, lda, unsafe { &b[0] }, ldb) } @[inline] -pub fn dtrsm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha f64, a []f64, lda int, mut b []f64, ldb int) { - C.cblas_dtrsm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, unsafe { &a[0] }, +pub fn dtrsm(side Side, uplo Uplo, trans Transpose, diag Diagonal, m int, n int, alpha f64, a []f64, lda int, mut b []f64, ldb int) { + C.cblas_dtrsm(.row_major, side, uplo, trans, diag, m, n, alpha, unsafe { &a[0] }, lda, unsafe { &b[0] }, ldb) } @[inline] -pub fn ctrsm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) { - C.cblas_ctrsm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, a, - lda, b, ldb) +pub fn ctrsm(side Side, uplo Uplo, trans Transpose, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) { + C.cblas_ctrsm(.row_major, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) } @[inline] -pub fn ztrsm(side Side, uplo bool, trans bool, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) { - C.cblas_ztrsm(.row_major, side, c_uplo(uplo), c_trans(trans), diag, m, n, alpha, a, - lda, b, ldb) +pub fn ztrsm(side Side, uplo Uplo, trans Transpose, diag Diagonal, m int, n int, alpha voidptr, a voidptr, lda int, mut b voidptr, ldb int) { + C.cblas_ztrsm(.row_major, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) } @[inline] -pub fn chemm(side Side, uplo bool, m int, n int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) { - C.cblas_chemm(.row_major, side, c_uplo(uplo), m, n, alpha, a, lda, b, ldb, beta, c, - ldc) +pub fn chemm(side Side, uplo Uplo, m int, n int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) { + C.cblas_chemm(.row_major, side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc) } @[inline] -pub fn zhemm(side Side, uplo bool, m int, n int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) { - C.cblas_zhemm(.row_major, side, c_uplo(uplo), m, n, alpha, a, lda, b, ldb, beta, c, - ldc) +pub fn zhemm(side Side, uplo Uplo, m int, n int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta voidptr, mut c voidptr, ldc int) { + C.cblas_zhemm(.row_major, side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc) } @[inline] -pub fn cherk(uplo bool, trans bool, n int, k int, alpha f32, a voidptr, lda int, beta f32, mut c voidptr, ldc int) { - C.cblas_cherk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, beta, - c, ldc) +pub fn cherk(uplo Uplo, trans Transpose, n int, k int, alpha f32, a voidptr, lda int, beta f32, mut c voidptr, ldc int) { + C.cblas_cherk(.row_major, uplo, trans, n, k, alpha, a, lda, beta, c, ldc) } @[inline] -pub fn zherk(uplo bool, trans bool, n int, k int, alpha f64, a voidptr, lda int, beta f64, mut c voidptr, ldc int) { - C.cblas_zherk(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, beta, - c, ldc) +pub fn zherk(uplo Uplo, trans Transpose, n int, k int, alpha f64, a voidptr, lda int, beta f64, mut c voidptr, ldc int) { + C.cblas_zherk(.row_major, uplo, trans, n, k, alpha, a, lda, beta, c, ldc) } @[inline] -pub fn cher2k(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta f32, mut c voidptr, ldc int) { - C.cblas_cher2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, b, ldb, - beta, c, ldc) +pub fn cher2k(uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta f32, mut c voidptr, ldc int) { + C.cblas_cher2k(.row_major, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc) } @[inline] -pub fn zher2k(uplo bool, trans bool, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta f64, mut c voidptr, ldc int) { - C.cblas_zher2k(.row_major, c_uplo(uplo), c_trans(trans), n, k, alpha, a, lda, b, ldb, - beta, c, ldc) +pub fn zher2k(uplo Uplo, trans Transpose, n int, k int, alpha voidptr, a voidptr, lda int, b voidptr, ldb int, beta f64, mut c voidptr, ldc int) { + C.cblas_zher2k(.row_major, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc) } @[inline] @@ -986,47 +970,45 @@ pub fn zaxpby(n int, alpha voidptr, x voidptr, incx int, beta voidptr, mut y voi } @[inline] -pub fn somatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha f32, a []f32, lda int, mut b []f32, ldb int) { - C.cblas_somatcopy(order, c_trans(trans), rows, cols, alpha, unsafe { &a[0] }, lda, - unsafe { &b[0] }, ldb) +pub fn somatcopy(order MemoryLayout, trans Transpose, rows int, cols int, alpha f32, a []f32, lda int, mut b []f32, ldb int) { + C.cblas_somatcopy(order, trans, rows, cols, alpha, unsafe { &a[0] }, lda, unsafe { &b[0] }, + ldb) } @[inline] -pub fn domatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha f64, a []f64, lda int, mut b []f64, ldb int) { - C.cblas_domatcopy(order, c_trans(trans), rows, cols, alpha, unsafe { &a[0] }, lda, - unsafe { &b[0] }, ldb) +pub fn domatcopy(order MemoryLayout, trans Transpose, rows int, cols int, alpha f64, a []f64, lda int, mut b []f64, ldb int) { + C.cblas_domatcopy(order, trans, rows, cols, alpha, unsafe { &a[0] }, lda, unsafe { &b[0] }, + ldb) } @[inline] -pub fn comatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha &f32, a &f32, lda int, mut b &f32, ldb int) { - C.cblas_comatcopy(order, c_trans(trans), rows, cols, alpha, a, lda, b, ldb) +pub fn comatcopy(order MemoryLayout, trans Transpose, rows int, cols int, alpha &f32, a &f32, lda int, mut b &f32, ldb int) { + C.cblas_comatcopy(order, trans, rows, cols, alpha, a, lda, b, ldb) } @[inline] -pub fn zomatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha &f64, a &f64, lda int, mut b &f64, ldb int) { - C.cblas_zomatcopy(order, c_trans(trans), rows, cols, alpha, a, lda, b, ldb) +pub fn zomatcopy(order MemoryLayout, trans Transpose, rows int, cols int, alpha &f64, a &f64, lda int, mut b &f64, ldb int) { + C.cblas_zomatcopy(order, trans, rows, cols, alpha, a, lda, b, ldb) } @[inline] -pub fn simatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha f32, mut a []f32, lda int, ldb int) { - C.cblas_simatcopy(order, c_trans(trans), rows, cols, alpha, unsafe { &a[0] }, lda, - ldb) +pub fn simatcopy(order MemoryLayout, trans Transpose, rows int, cols int, alpha f32, mut a []f32, lda int, ldb int) { + C.cblas_simatcopy(order, trans, rows, cols, alpha, unsafe { &a[0] }, lda, ldb) } @[inline] -pub fn dimatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha f64, mut a []f64, lda int, ldb int) { - C.cblas_dimatcopy(order, c_trans(trans), rows, cols, alpha, unsafe { &a[0] }, lda, - ldb) +pub fn dimatcopy(order MemoryLayout, trans Transpose, rows int, cols int, alpha f64, mut a []f64, lda int, ldb int) { + C.cblas_dimatcopy(order, trans, rows, cols, alpha, unsafe { &a[0] }, lda, ldb) } @[inline] -pub fn cimatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha &f32, mut a &f32, lda int, ldb int) { - C.cblas_cimatcopy(order, c_trans(trans), rows, cols, alpha, a, lda, ldb) +pub fn cimatcopy(order MemoryLayout, trans Transpose, rows int, cols int, alpha &f32, mut a &f32, lda int, ldb int) { + C.cblas_cimatcopy(order, trans, rows, cols, alpha, a, lda, ldb) } @[inline] -pub fn zimatcopy(order MemoryLayout, trans bool, rows int, cols int, alpha &f64, mut a &f64, lda int, ldb int) { - C.cblas_zimatcopy(order, c_trans(trans), rows, cols, alpha, a, lda, ldb) +pub fn zimatcopy(order MemoryLayout, trans Transpose, rows int, cols int, alpha &f64, mut a &f64, lda int, ldb int) { + C.cblas_zimatcopy(order, trans, rows, cols, alpha, a, lda, ldb) } @[inline] @@ -1052,7 +1034,7 @@ pub fn zgeadd(order MemoryLayout, rows int, cols int, alpha &f64, a &f64, lda in } @[inline] -pub fn dgemm(trans_a bool, trans_b bool, m int, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut cc []f64, ldc int) { - C.cblas_dgemm(.row_major, c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, unsafe { &a[0] }, - lda, unsafe { &b[0] }, ldb, beta, unsafe { &cc[0] }, ldc) +pub fn dgemm(trans_a Transpose, trans_b Transpose, m int, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut cc []f64, ldc int) { + C.cblas_dgemm(.row_major, trans_a, trans_b, m, n, k, alpha, unsafe { &a[0] }, lda, + unsafe { &b[0] }, ldb, beta, unsafe { &cc[0] }, ldc) } diff --git a/blas/oblas_notd_vsl_blas_cblas.v b/blas/oblas_notd_vsl_blas_cblas.v index 8552dde50..f05117128 100644 --- a/blas/oblas_notd_vsl_blas_cblas.v +++ b/blas/oblas_notd_vsl_blas_cblas.v @@ -47,8 +47,8 @@ pub fn dscal(n int, alpha f64, mut x []f64, incx int) { } @[inline] -pub fn dgemv(trans bool, m int, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { - blas64.dgemv(c_trans(trans), m, n, alpha, a, lda, x, incx, beta, mut y, incy) +pub fn dgemv(trans Transpose, m int, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { + blas64.dgemv(trans, m, n, alpha, a, lda, x, incx, beta, mut y, incy) } @[inline] @@ -57,27 +57,26 @@ pub fn dger(m int, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a } @[inline] -pub fn dtrsv(uplo bool, trans_a bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { - blas64.dtrsv(c_uplo(uplo), c_trans(trans_a), diag, n, a, lda, mut x, incx) +pub fn dtrsv(uplo Uplo, trans_a Transpose, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { + blas64.dtrsv(uplo, trans_a, diag, n, a, lda, mut x, incx) } @[inline] -pub fn dtrmv(uplo bool, trans_a bool, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { - blas64.dtrmv(c_uplo(uplo), c_trans(trans_a), diag, n, a, lda, mut x, incx) +pub fn dtrmv(uplo Uplo, trans_a Transpose, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { + blas64.dtrmv(uplo, trans_a, diag, n, a, lda, mut x, incx) } @[inline] -pub fn dsyr(uplo bool, n int, alpha f64, x []f64, incx int, mut a []f64, lda int) { - blas64.dsyr(c_uplo(uplo), n, alpha, x, incx, mut a, lda) +pub fn dsyr(uplo Uplo, n int, alpha f64, x []f64, incx int, mut a []f64, lda int) { + blas64.dsyr(uplo, n, alpha, x, incx, mut a, lda) } @[inline] -pub fn dsyr2(uplo bool, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) { - blas64.dsyr2(c_uplo(uplo), n, alpha, x, incx, y, incy, mut a, lda) +pub fn dsyr2(uplo Uplo, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) { + blas64.dsyr2(uplo, n, alpha, x, incx, y, incy, mut a, lda) } @[inline] -pub fn dgemm(trans_a bool, trans_b bool, m int, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut cc []f64, ldc int) { - blas64.dgemm(c_trans(trans_a), c_trans(trans_b), m, n, k, alpha, a, lda, b, ldb, beta, mut - cc, ldc) +pub fn dgemm(trans_a Transpose, trans_b Transpose, m int, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut cc []f64, ldc int) { + blas64.dgemm(trans_a, trans_b, m, n, k, alpha, a, lda, b, ldb, beta, mut cc, ldc) } diff --git a/float/float64/gemv_test.v b/float/float64/gemv_test.v index 34a383ee7..c84060ffd 100644 --- a/float/float64/gemv_test.v +++ b/float/float64/gemv_test.v @@ -527,13 +527,13 @@ fn test_gemv() { } } -fn dgemvcomp(mut test DgemvCase, trans bool, case DgemvSubcase) { +fn dgemvcomp(mut test DgemvCase, trans Transpose, case DgemvSubcase) { tol := 1e-15 x_gd_val, y_gd_val, a_gd_val := 0.5, 1.5, 10 gd_ln := 4 - test_x := if trans { test.y } else { test.x } - test_y := if trans { test.x } else { test.y } + test_x := if trans == .trans { test.y } else { test.x } + test_y := if trans == .trans { test.x } else { test.y } mut xg, mut yg := guard_vector(test_x, x_gd_val, gd_ln), guard_vector(test_y, y_gd_val, gd_ln) @@ -543,7 +543,7 @@ fn dgemvcomp(mut test DgemvCase, trans bool, case DgemvSubcase) { lda := u32(test.n) - if trans { + if trans == .trans { gemv_t(u32(test.m), u32(test.n), case.alpha, a, lda, x, 1, case.beta, mut y, 1) } else { gemv_n(u32(test.m), u32(test.n), case.alpha, a, lda, x, 1, case.beta, mut y, 1) @@ -577,7 +577,7 @@ fn dgemvcomp(mut test DgemvCase, trans bool, case DgemvSubcase) { ag = guard_vector(test.a, a_gd_val, gd_ln) a = ag[gd_ln..ag.len - gd_ln] - if trans { + if trans == .trans { gemv_t(u32(test.m), u32(test.n), case.alpha, a, lda, x, u32(inc.x), case.beta, mut y, u32(inc.y)) } else { diff --git a/la/blas.v b/la/blas.v index 03b88060a..d472ad0af 100644 --- a/la/blas.v +++ b/la/blas.v @@ -127,7 +127,7 @@ pub fn matrix_vector_mul[T](alpha T, a &Matrix[T], u []T) []T { } return v } - blas.dgemv(false, a.m, a.n, alpha, a.data, a.n, u, 1, 0.0, mut v, 1) + blas.dgemv(.no_trans, a.m, a.n, alpha, a.data, a.n, u, 1, 0.0, mut v, 1) return v } $else { mut v := []T{len: a.m} @@ -157,7 +157,7 @@ pub fn matrix_tr_vector_mul[T](alpha T, a &Matrix[T], u []T) []T { } return v } - blas.dgemv(true, a.m, a.n, alpha, a.data, a.n, u, 1, 0.0, mut v, 1) + blas.dgemv(.trans, a.m, a.n, alpha, a.data, a.n, u, 1, 0.0, mut v, 1) return v } $else { mut v := []T{len: a.n} @@ -208,7 +208,7 @@ pub fn vector_vector_tr_mul[T](alpha T, u []T, v []T) &Matrix[T] { // pub fn matrix_vector_mul_add(alpha f64, a &Matrix[f64], u []f64) []f64 { mut v := []f64{len: a.m} - blas.dgemv(false, a.m, a.n, alpha, a.data, a.m, u, 1, 1.0, mut v, 1) + blas.dgemv(.no_trans, a.m, a.n, alpha, a.data, a.m, u, 1, 1.0, mut v, 1) return v } @@ -228,7 +228,7 @@ pub fn matrix_matrix_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix } return } - blas.dgemm(false, false, a.m, b.n, a.n, alpha, a.data, a.m, b.data, b.m, 0.0, mut + blas.dgemm(.no_trans, .no_trans, a.m, b.n, a.n, alpha, a.data, a.m, b.data, b.m, 0.0, mut c.data, c.m) } @@ -248,8 +248,8 @@ pub fn matrix_tr_matrix_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Mat } return } - blas.dgemm(true, false, a.n, b.n, a.m, alpha, a.data, a.m, b.data, b.m, 0.0, mut c.data, - c.m) + blas.dgemm(.trans, .no_trans, a.n, b.n, a.m, alpha, a.data, a.m, b.data, b.m, 0.0, mut + c.data, c.m) } // matrix_matrix_tr_mul returns the matrix multiplication (scaled) with transposed(b) @@ -257,8 +257,8 @@ pub fn matrix_tr_matrix_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Mat // c := alpha⋅a⋅bᵀ ⇒ cij := alpha * aik * bjk // pub fn matrix_matrix_tr_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) { - blas.dgemm(false, true, a.m, b.m, a.n, alpha, a.data, a.n, b.data, b.m, 0.0, mut c.data, - c.m) + blas.dgemm(.no_trans, .trans, a.m, b.m, a.n, alpha, a.data, a.n, b.data, b.m, 0.0, mut + c.data, c.m) } // matrix_tr_matrix_tr_mul returns the matrix multiplication (scaled) with transposed(a) and transposed(b) @@ -266,8 +266,8 @@ pub fn matrix_matrix_tr_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Mat // c := alpha⋅aᵀ⋅bᵀ ⇒ cij := alpha * aki * bjk // pub fn matrix_tr_matrix_tr_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) { - blas.dgemm(true, true, a.n, b.m, a.m, alpha, a.data, a.n, b.data, b.m, 0.0, mut c.data, - c.m) + blas.dgemm(.trans, .trans, a.n, b.m, a.m, alpha, a.data, a.n, b.data, b.m, 0.0, mut + c.data, c.m) } // matrix_matrix_muladd returns the matrix multiplication (scaled) @@ -275,7 +275,7 @@ pub fn matrix_tr_matrix_tr_mul(mut c Matrix[f64], alpha f64, a &Matrix[f64], b & // c += alpha⋅a⋅b ⇒ cij += alpha * aik * bkj // pub fn matrix_matrix_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) { - blas.dgemm(false, false, a.m, b.n, a.n, alpha, a.data, a.n, b.data, b.m, 1.0, mut + blas.dgemm(.no_trans, .no_trans, a.m, b.n, a.n, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data, c.m) } @@ -284,8 +284,8 @@ pub fn matrix_matrix_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Mat // c += alpha⋅aᵀ⋅b ⇒ cij += alpha * aki * bkj // pub fn matrix_tr_matrix_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) { - blas.dgemm(true, false, a.n, b.n, a.m, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data, - c.m) + blas.dgemm(.trans, .no_trans, a.n, b.n, a.m, alpha, a.data, a.n, b.data, b.m, 1.0, mut + c.data, c.m) } // matrix_matrix_tr_muladd returns the matrix multiplication (scaled) with transposed(b) @@ -293,8 +293,8 @@ pub fn matrix_tr_matrix_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b & // c += alpha⋅a⋅bᵀ ⇒ cij += alpha * aik * bjk // pub fn matrix_matrix_tr_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) { - blas.dgemm(false, true, a.m, b.m, a.n, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data, - c.m) + blas.dgemm(.no_trans, .trans, a.m, b.m, a.n, alpha, a.data, a.n, b.data, b.m, 1.0, mut + c.data, c.m) } // matrix_tr_matrix_tr_mul_add returns the matrix multiplication (scaled) with transposed(a) and transposed(b) @@ -302,8 +302,8 @@ pub fn matrix_matrix_tr_muladd(mut c Matrix[f64], alpha f64, a &Matrix[f64], b & // c += alpha⋅aᵀ⋅bᵀ ⇒ cij += alpha * aki * bjk // pub fn matrix_tr_matrix_tr_mul_add(mut c Matrix[f64], alpha f64, a &Matrix[f64], b &Matrix[f64]) { - blas.dgemm(true, true, a.n, b.m, a.m, alpha, a.data, a.n, b.data, b.m, 1.0, mut c.data, - c.m) + blas.dgemm(.trans, .trans, a.n, b.m, a.m, alpha, a.data, a.n, b.data, b.m, 1.0, mut + c.data, c.m) } // matrix_add adds the scaled components of two matrices diff --git a/lapack/lapack64/dgetrf.v b/lapack/lapack64/dgetrf.v index 5a1dc8d29..01f7968f8 100644 --- a/lapack/lapack64/dgetrf.v +++ b/lapack/lapack64/dgetrf.v @@ -79,13 +79,13 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, mut ipiv []int) { dlaswp(j, mut slice2, lda, j, j + jb, mut slice_ipiv2, 1) mut slice3 := unsafe { a[j * lda + j + jb..] } - blas.dtrsm(.left, false, false, .unit, jb, n - j - jb, 1, a[j * lda + j..], + blas.dtrsm(.left, .lower, .no_trans, .unit, jb, n - j - jb, 1, a[j * lda + j..], lda, mut slice3, lda) if j + jb < m { mut slice4 := unsafe { a[(j + jb) * lda + j + jb..] } - blas.dgemm(false, false, m - j - jb, n - j - jb, jb, -1, a[(j + jb) * lda + j..], - lda, a[j * lda + j + jb..], lda, 1, mut slice4, lda) + blas.dgemm(.no_trans, .no_trans, m - j - jb, n - j - jb, jb, -1, a[(j + jb) * lda + + j..], lda, a[j * lda + j + jb..], lda, 1, mut slice4, lda) } } } diff --git a/lapack/lapack64/dgetrs.v b/lapack/lapack64/dgetrs.v index 9e1424600..fa40169d5 100644 --- a/lapack/lapack64/dgetrs.v +++ b/lapack/lapack64/dgetrs.v @@ -52,15 +52,15 @@ pub fn dgetrs(trans blas.Transpose, n int, nrhs int, mut a []f64, lda int, mut i // Solve A * X = B. dlaswp(nrhs, mut b, ldb, 0, n - 1, mut ipiv, 1) // Solve L * X = B, overwriting B with X. - blas.dtrsm(.left, false, false, .unit, n, nrhs, 1, a, lda, mut b, ldb) + blas.dtrsm(.left, .lower, .no_trans, .unit, n, nrhs, 1, a, lda, mut b, ldb) // Solve U * X = B, overwriting B with X. - blas.dtrsm(.left, true, false, .non_unit, n, nrhs, 1, a, lda, mut b, ldb) + blas.dtrsm(.left, .upper, .no_trans, .non_unit, n, nrhs, 1, a, lda, mut b, ldb) } // Solve Aᵀ * X = B. // Solve Uᵀ * X = B, overwriting B with X. - blas.dtrsm(.left, true, true, .non_unit, n, nrhs, 1, a, lda, mut b, ldb) + blas.dtrsm(.left, .upper, .trans, .non_unit, n, nrhs, 1, a, lda, mut b, ldb) // Solve Lᵀ * X = B, overwriting B with X. - blas.dtrsm(.left, false, true, .unit, n, nrhs, 1, a, lda, mut b, ldb) + blas.dtrsm(.left, .lower, .trans, .unit, n, nrhs, 1, a, lda, mut b, ldb) dlaswp(nrhs, mut b, ldb, 0, n - 1, mut ipiv, -1) } diff --git a/lapack/lapack_d_vsl_lapack_lapacke.v b/lapack/lapack_d_vsl_lapack_lapacke.v index 9bc8314d9..bd96129ae 100644 --- a/lapack/lapack_d_vsl_lapack_lapacke.v +++ b/lapack/lapack_d_vsl_lapack_lapacke.v @@ -47,8 +47,8 @@ pub fn dgesv(n int, nrhs int, mut a []f64, lda int, mut ipiv []int, mut b []f64, if ipiv.len != n { errors.vsl_panic('ipiv.len must be equal to n. ${ipiv.len} != ${n}\n', .efailed) } - info := C.LAPACKE_dgesv(.row_major, n, nrhs, unsafe { &a[0] }, lda, unsafe { &ipiv[0] }, unsafe { &b[0] }, - ldb) + info := C.LAPACKE_dgesv(.row_major, n, nrhs, unsafe { &a[0] }, lda, unsafe { &ipiv[0] }, + unsafe { &b[0] }, ldb) if info != 0 { errors.vsl_panic('lapack failed', .efailed) } @@ -141,9 +141,9 @@ pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) { // where U is an upper triangular matrix and L is lower triangular. // // This is the block version of the algorithm, calling Level 3 BLAS. -pub fn dpotrf(uplo bool, n int, mut a []f64, lda int) { +pub fn dpotrf(uplo blas.Uplo, n int, mut a []f64, lda int) { unsafe { - info := C.LAPACKE_dpotrf(.row_major, blas.c_uplo(uplo), n, &a[0], lda) + info := C.LAPACKE_dpotrf(.row_major, uplo, n, &a[0], lda) if info != 0 { errors.vsl_panic('lapack failed', .efailed) } From c66401ffaacc74a8c1ab1b281215f88688b3eff6 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Tue, 18 Jun 2024 02:35:14 -0300 Subject: [PATCH 21/33] refactor: Update gemv_test.v to use named arguments in dgemvcomp calls --- float/float64/gemv_test.v | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/float/float64/gemv_test.v b/float/float64/gemv_test.v index c84060ffd..bd2749b64 100644 --- a/float/float64/gemv_test.v +++ b/float/float64/gemv_test.v @@ -518,11 +518,11 @@ struct DgemvSubcase { fn test_gemv() { for mut test in float64.dgemv_tests { for case in test.no_trans { - dgemvcomp(mut test, false, case) + dgemvcomp(mut test, .no_trans, case) } for case in test.trans { - dgemvcomp(mut test, true, case) + dgemvcomp(mut test, .trans, case) } } } From d399e1a9d44d11acdb07d7652728c42b94911da8 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Tue, 18 Jun 2024 02:37:17 -0300 Subject: [PATCH 22/33] refactor: Update gemv_test.v to use named arguments in dgemvcomp calls --- float/float64/gemv_test.v | 1 + 1 file changed, 1 insertion(+) diff --git a/float/float64/gemv_test.v b/float/float64/gemv_test.v index bd2749b64..efe56858c 100644 --- a/float/float64/gemv_test.v +++ b/float/float64/gemv_test.v @@ -1,6 +1,7 @@ module float64 import math +import vsl.blas const dgemv_tests = [ DgemvCase{ // 1x1 From 1d5e441f6a63470226ebf237b602d9a9851f471f Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Tue, 18 Jun 2024 02:38:35 -0300 Subject: [PATCH 23/33] refactor: Update gemv_test.v to use named arguments in dgemvcomp calls --- float/float64/gemv_test.v | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/float/float64/gemv_test.v b/float/float64/gemv_test.v index efe56858c..34a383ee7 100644 --- a/float/float64/gemv_test.v +++ b/float/float64/gemv_test.v @@ -1,7 +1,6 @@ module float64 import math -import vsl.blas const dgemv_tests = [ DgemvCase{ // 1x1 @@ -519,22 +518,22 @@ struct DgemvSubcase { fn test_gemv() { for mut test in float64.dgemv_tests { for case in test.no_trans { - dgemvcomp(mut test, .no_trans, case) + dgemvcomp(mut test, false, case) } for case in test.trans { - dgemvcomp(mut test, .trans, case) + dgemvcomp(mut test, true, case) } } } -fn dgemvcomp(mut test DgemvCase, trans Transpose, case DgemvSubcase) { +fn dgemvcomp(mut test DgemvCase, trans bool, case DgemvSubcase) { tol := 1e-15 x_gd_val, y_gd_val, a_gd_val := 0.5, 1.5, 10 gd_ln := 4 - test_x := if trans == .trans { test.y } else { test.x } - test_y := if trans == .trans { test.x } else { test.y } + test_x := if trans { test.y } else { test.x } + test_y := if trans { test.x } else { test.y } mut xg, mut yg := guard_vector(test_x, x_gd_val, gd_ln), guard_vector(test_y, y_gd_val, gd_ln) @@ -544,7 +543,7 @@ fn dgemvcomp(mut test DgemvCase, trans Transpose, case DgemvSubcase) { lda := u32(test.n) - if trans == .trans { + if trans { gemv_t(u32(test.m), u32(test.n), case.alpha, a, lda, x, 1, case.beta, mut y, 1) } else { gemv_n(u32(test.m), u32(test.n), case.alpha, a, lda, x, 1, case.beta, mut y, 1) @@ -578,7 +577,7 @@ fn dgemvcomp(mut test DgemvCase, trans Transpose, case DgemvSubcase) { ag = guard_vector(test.a, a_gd_val, gd_ln) a = ag[gd_ln..ag.len - gd_ln] - if trans == .trans { + if trans { gemv_t(u32(test.m), u32(test.n), case.alpha, a, lda, x, u32(inc.x), case.beta, mut y, u32(inc.y)) } else { From 9f46519d6e0031e248977d5bd3bae10ad8b62285 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sat, 22 Jun 2024 02:50:40 -0300 Subject: [PATCH 24/33] refactor: Update conversions.v, dgetf2.v, dsyev.v, and lapack_notd_vsl_lapack_lapacke.v - Add functions uplo_from_bool and uplo_to_bool to conversions.v - Remove unused dlamch_s function from dgetf2.v - Update dsyev.v to include additional error handling and scaling of matrix - Update dpotrf function in lapack_notd_vsl_lapack_lapacke.v to use uplo_from_bool for uplo parameter --- blas/conversions.v | 10 ++ lapack/lapack64/dgetf2.v | 8 +- lapack/lapack64/dlansy.v | 122 ++++++++++++++++++++++++ lapack/lapack64/dlassq.v | 120 +++++++++++++++++++++++ lapack/lapack64/dsyev.v | 108 ++++++++++++++++----- lapack/lapack64/lapack64.v | 50 ++++++++++ lapack/lapack_notd_vsl_lapack_lapacke.v | 4 +- 7 files changed, 391 insertions(+), 31 deletions(-) create mode 100644 lapack/lapack64/dlansy.v create mode 100644 lapack/lapack64/dlassq.v create mode 100644 lapack/lapack64/lapack64.v diff --git a/blas/conversions.v b/blas/conversions.v index b421fa60c..18ace678c 100644 --- a/blas/conversions.v +++ b/blas/conversions.v @@ -21,6 +21,16 @@ pub type Diagonal = blas64.Diagonal // Side is used to specify whether a matrix is on the left or right side in a matrix-matrix multiplication. pub type Side = blas64.Side +// uplo_from_bool converts a boolean to Uplo. +pub fn uplo_from_bool(uplo bool) Uplo { + return if uplo { .upper } else { .lower } +} + +// uplo_to_bool converts Uplo to a boolean. +pub fn uplo_to_bool(uplo Uplo) bool { + return uplo == .upper +} + // slice_to_col_major converts nested slice into an array representing a col-major matrix // // _**NOTE**: make sure to have at least 1x1 item_ diff --git a/lapack/lapack64/dgetf2.v b/lapack/lapack64/dgetf2.v index 2748c78d7..9aa920500 100644 --- a/lapack/lapack64/dgetf2.v +++ b/lapack/lapack64/dgetf2.v @@ -24,7 +24,7 @@ pub fn dgetf2(m int, n int, mut a []f64, lda int, mut ipiv []int) { panic(bad_len_ipiv) } - sfmin := dlamch_s() + sfmin := dlamch_s for j := 0; j < mn; j++ { // Find a pivot and test for singularity. @@ -58,9 +58,3 @@ pub fn dgetf2(m int, n int, mut a []f64, lda int, mut ipiv []int) { } } } - -fn dlamch_s() f64 { - // Returns the safe minimum value (sfmin). - // This value is used as a threshold for detecting small values in the matrix. - return math.ldexp(1.0, -1022) // Smallest positive normal number. -} diff --git a/lapack/lapack64/dlansy.v b/lapack/lapack64/dlansy.v new file mode 100644 index 000000000..04dd3284f --- /dev/null +++ b/lapack/lapack64/dlansy.v @@ -0,0 +1,122 @@ +module lapack64 + +import math +import vsl.blas + +// dlansy returns the value of the specified norm of an n×n symmetric matrix. +// If norm == MatrixNorm.max_column_sum or norm == MatrixNorm.max_row_sum, work must have length +// at least n, otherwise work is unused. +pub fn dlansy(norm MatrixNorm, uplo blas.Uplo, n int, a []f64, lda int, mut work []f64) f64 { + if norm != .max_row_sum && norm != .max_column_sum && norm != .frobenius && norm != .max_abs { + panic(lapack64.bad_norm) + } + if uplo != .upper && uplo != .lower { + panic(lapack64.bad_uplo) + } + if n < 0 { + panic('lapack: n < 0') + } + if lda < math.max(1, n) { + panic(lapack64.bad_ld_a) + } + + // Quick return if possible. + if n == 0 { + return 0.0 + } + + if a.len < (n-1) * lda + n { + panic(lapack64.short_a) + } + if (norm == .max_column_sum || norm == .max_row_sum) && work.len < n { + panic(lapack64.short_work) + } + + match norm { + .max_abs { + if uplo == .upper { + mut max := 0.0 + for i in 0 .. n { + for j in i .. n { + v := math.abs(a[i * lda + j]) + if math.is_nan(v) { + return math.nan() + } + if v > max { + max = v + } + } + } + return max + } + mut max := 0.0 + for i in 0 .. n { + for j in 0 .. i + 1 { + v := math.abs(a[i * lda + j]) + if math.is_nan(v) { + return math.nan() + } + if v > max { + max = v + } + } + } + return max + } + .max_row_sum, .max_column_sum { + // A symmetric matrix has the same 1-norm and ∞-norm. + for i in 0 .. n { + work[i] = 0.0 + } + if uplo == .upper { + for i in 0 .. n { + work[i] += math.abs(a[i * lda + i]) + for j in i + 1 .. n { + v := math.abs(a[i * lda + j]) + work[i] += v + work[j] += v + } + } + } else { + for i in 0 .. n { + for j in 0 .. i { + v := math.abs(a[i * lda + j]) + work[i] += v + work[j] += v + } + work[i] += math.abs(a[i * lda + i]) + } + } + mut max := 0.0 + for i in 0 .. n { + v := work[i] + if math.is_nan(v) { + return math.nan() + } + if v > max { + max = v + } + } + return max + } + else { + // blas.frobenius: + mut scale := 0.0 + mut sum := 1.0 + // Sum off-diagonals. + if uplo == .upper { + for i in 0 .. n - 1 { + scale, sum = dlassq(n - i - 1, a[i * lda + i + 1..], 1, scale, sum) + } + } else { + for i in 1 .. n { + scale, sum = dlassq(i, a[i * lda..], 1, scale, sum) + } + } + sum *= 2.0 + // Sum diagonal. + scale, sum = dlassq(n, a, lda + 1, scale, sum) + return scale * math.sqrt(sum) + } + } +} diff --git a/lapack/lapack64/dlassq.v b/lapack/lapack64/dlassq.v new file mode 100644 index 000000000..c29d74dc0 --- /dev/null +++ b/lapack/lapack64/dlassq.v @@ -0,0 +1,120 @@ +module lapack64 + +import math + +// dlassq updates a sum of squares represented in scaled form. It returns +// the values scl and smsq such that +// +// scl^2*smsq = X[0]^2 + ... + X[n-1]^2 + scale^2*sumsq +// +// The value of sumsq is assumed to be non-negative. +pub fn dlassq(n int, x []f64, incx int, scale f64, sumsq f64) (f64, f64) { + if n < 0 { + panic('lapack: n < 0') + } + if incx <= 0 { + panic('lapack: increment not one or negative one') + } + if x.len < 1 + (n - 1) * incx { + panic('lapack: insufficient length of x') + } + + if math.is_nan(scale) || math.is_nan(sumsq) { + return scale, sumsq + } + + mut scl := scale + mut smsq := sumsq + + if smsq == 0.0 { + scl = 1.0 + } + if scl == 0.0 { + scl = 1.0 + smsq = 0.0 + } + + if n == 0 { + return scl, smsq + } + + // Compute the sum of squares in 3 accumulators: + // - abig: sum of squares scaled down to avoid overflow + // - asml: sum of squares scaled up to avoid underflow + // - amed: sum of squares that do not require scaling + // The thresholds and multipliers are: + // - values bigger than dtbig are scaled down by dsbig + // - values smaller than dtsml are scaled up by dssml + mut is_big := false + mut asml, mut amed, mut abig := 0.0, 0.0, 0.0 + mut ix := 0 + for _ in 0 .. n { + mut ax := math.abs(x[ix]) + if ax > dtbig { + ax *= dsbig + abig += ax * ax + is_big = true + } else if ax < dtsml { + if !is_big { + ax *= dssml + asml += ax * ax + } + } else { + amed += ax * ax + } + ix += incx + } + // Put the existing sum of squares into one of the accumulators. + if smsq > 0.0 { + ax := scl * math.sqrt(smsq) + if ax > dtbig { + if scl > 1.0 { + scl *= dsbig + abig += scl * scl * smsq + } else { + // sumsq > dtbig^2 => (dsbig * (dsbig * sumsq)) is representable. + abig += scl * scl * dsbig * dsbig * smsq + } + } else if ax < dtsml { + if !is_big { + if scl < 1.0 { + scl *= dssml + asml += scl * scl * smsq + } else { + // sumsq < dtsml^2 => (dssml * (dssml * sumsq)) is representable. + asml += scl * scl * dssml * dssml * smsq + } + } + } else { + amed += scl * scl * smsq + } + } + // Combine abig and amed or amed and asml if more than one accumulator was used. + if abig > 0.0 { + // Combine abig and amed: + if amed > 0.0 || math.is_nan(amed) { + abig += amed * dsbig * dsbig + } + scl = 1.0 / dsbig + smsq = abig + } else if asml > 0.0 { + // Combine amed and asml: + if amed > 0.0 || math.is_nan(amed) { + amed = math.sqrt(amed) + asml = math.sqrt(asml) / dssml + mut ymin, mut ymax := asml, amed + if asml > amed { + ymin, ymax = amed, asml + } + scl = 1.0 + smsq = ymax * ymax * (1.0 + (ymin / ymax) * (ymin / ymax)) + } else { + scl = 1.0 / dssml + smsq = asml + } + } else { + scl = 1.0 + smsq = amed + } + return scl, smsq +} diff --git a/lapack/lapack64/dsyev.v b/lapack/lapack64/dsyev.v index a48998d2a..a65f019fa 100644 --- a/lapack/lapack64/dsyev.v +++ b/lapack/lapack64/dsyev.v @@ -3,36 +3,100 @@ module lapack64 import math import vsl.blas -// dsyev computes all eigenvalues and, optionally, eigenvectors of a real symmetric matrix A. -pub fn dsyev(jobz EVJob, uplo blas.Uplo, n int, mut a []f64, lda int, w []f64) int { +pub fn dsyev(jobz EVJob, uplo blas.Uplo, n int, mut a []f64, lda int, mut w []f64, mut work []f64, lwork int) { + if jobz != .ev_none && jobz != .ev_compute { + panic(bad_ev_job) + } + if uplo != .upper && uplo != .lower { + panic(bad_uplo) + } + if n < 0 { + panic(n_lt0) + } + if lda < math.max(1, n) { + panic(bad_ld_a) + } + if lwork < math.max(1, 3 * n - 1) && lwork != -1 { + panic(bad_l_work) + } + if work.len < math.max(1, lwork) { + panic(short_work) + } + + // Quick return if possible. if n == 0 { - return 0 + return } - mut info := 0 - if jobz != .ev_none && jobz != .ev_compute { - info = -1 - } else if uplo != .upper && uplo != .lower { - info = -2 - } else if n < 0 { - info = -3 - } else if lda < math.max(1, n) { - info = -5 + opts := if uplo == .upper { 'U' } else { 'L' } + nb := ilaenv(1, 'DSYTRD', opts, n, -1, -1, -1) + lworkopt := math.max(1, (nb + 2) * n) + if lwork == -1 { + work[0] = f64(lworkopt) + return } - if info != 0 { - return info + if a.len < (n - 1) * lda + n { + panic(short_a) + } + if w.len < n { + panic(short_w) } - // Quick return if possible - if n == 0 { - return 0 + if n == 1 { + w[0] = a[0] + work[0] = 2 + if jobz == .ev_compute { + a[0] = 1 + } + return + } + + safmin := dlamch_s + eps := dlamch_p + smlnum := safmin / eps + bignum := 1 / smlnum + rmin := math.sqrt(smlnum) + rmax := math.sqrt(bignum) + + // Scale matrix to allowable range, if necessary. + anrm := dlansy(.max_abs, uplo, n, a, lda, mut work) + mut scaled := false + mut sigma := f64(0) + if anrm > 0 && anrm < rmin { + scaled = true + sigma = rmin / anrm + } else if anrm > rmax { + scaled = true + sigma = rmax / anrm + } + if scaled { + kind := if uplo == .upper { MatrixType.upper_tri } else { MatrixType.lower_tri } + dlascl(kind, 0, 0, 1, sigma, n, n, mut a, lda) } + inde := 0 + indtau := inde + n + indwork := indtau + n + llwork := lwork - indwork + dsytrd(uplo, n, mut a, lda, mut w, mut work[inde..], mut work[indtau..], mut work[indwork..], + llwork) - // Call the relevant LAPACK functions - // (Here we would call the internal implementations like dsytrd, dorgtr, dormtr, etc.) + // For eigenvalues only, call Dsterf. For eigenvectors, first call Dorgtr + // to generate the orthogonal matrix, then call Dsteqr. + if jobz == .ev_none { + if !dsterf(n, mut w, mut work[inde..]) { + panic('Dsterf failed') + } + } else { + dorgtr(uplo, n, mut a, lda, mut work[indtau..], mut work[indwork..], llwork) + if !dsteqr(EvComp(jobz), n, mut w, mut work[inde..], mut a, lda, mut work[indtau..]) { + panic('Dsteqr failed') + } + } - // Placeholder for the actual LAPACK function calls - // Example: info = dsytrd(uplo, n, a, lda, w, work, lwork) - return info + // If the matrix was scaled, then rescale eigenvalues appropriately. + if scaled { + blas.dscal(n, 1 / sigma, mut w, 1) + } + work[0] = f64(lworkopt) } diff --git a/lapack/lapack64/lapack64.v b/lapack/lapack64/lapack64.v new file mode 100644 index 000000000..819fe4fe2 --- /dev/null +++ b/lapack/lapack64/lapack64.v @@ -0,0 +1,50 @@ +module lapack64 + +/// dlamch_e is the machine epsilon. For IEEE this is 2^{-53}. +const dlamch_e = 1.1102230246251565e-16 // 2^-53 + +// dlamch_b is the radix of the machine (the base of the number system). +const dlamch_b = 2.0 + +// dlamch_p is base * eps. +const dlamch_p = dlamch_b * dlamch_e + +// dlamch_s is the "safe minimum", that is, the lowest number such that +// 1/dlamch_s does not overflow, or also the smallest normal number. +// For IEEE this is 2^{-1022}. +const dlamch_s = 2.2250738585072014e-308 // 2^-1022 + +// Blue's scaling constants +// +// An n-vector x is well-scaled if +// dtsml ≤ |xᵢ| ≤ dtbig for 0 ≤ i < n and n ≤ 1/dlamch_p, +// where +// dtsml = 2^ceil((expmin-1)/2) = 2^ceil((-1021-1)/2) = 2^{-511} = 1.4916681462400413e-154 +// dtbig = 2^floor((expmax-digits+1)/2) = 2^floor((1024-53+1)/2) = 2^{486} = 1.997919072202235e+146 +// If any xᵢ is not well-scaled, then multiplying small values by dssml and +// large values by dsbig avoids underflow or overflow when computing the sum +// of squares \sum_0^{n-1} (xᵢ)². +// dssml = 2^{-floor((expmin-digits)/2)} = 2^{-floor((-1021-53)/2)} = 2^537 = 4.4989137945431964e+161 +// dsbig = 2^{-ceil((expmax+digits-1)/2)} = 2^{-ceil((1024+53-1)/2)} = 2^{-538} = 1.1113793747425387e-162 +// +// References: +// - Anderson E. (2017) +// Algorithm 978: Safe Scaling in the Level 1 BLAS +// ACM Trans Math Softw 44:1--28 +// https://doi.org/10.1145/3061665 +// - Blue, James L. (1978) +// A Portable Fortran Program to Find the Euclidean Norm of a Vector +// ACM Trans Math Softw 4:15--23 +// https://doi.org/10.1145/355769.355771 + +// dtsml constant +const dtsml = 1.4916681462400413e-154 // 2^-511 + +// dtbig constant +const dtbig = 1.997919072202235e+146 // 2^486 + +// dssml constant +const dssml = 4.4989137945431964e+161 // 2^537 + +// dsbig constant +const dsbig = 1.1113793747425387e-162 // 2^-538 diff --git a/lapack/lapack_notd_vsl_lapack_lapacke.v b/lapack/lapack_notd_vsl_lapack_lapacke.v index 2730f4ef6..7719a8c53 100644 --- a/lapack/lapack_notd_vsl_lapack_lapacke.v +++ b/lapack/lapack_notd_vsl_lapack_lapacke.v @@ -111,8 +111,8 @@ pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) { // where U is an upper triangular matrix and L is lower triangular. // // This is the block version of the algorithm, calling Level 3 BLAS. -pub fn dpotrf(up bool, n int, mut a []f64, lda int) { - info := lapack64.dpotrf(blas.c_uplo(up), n, mut a, lda) +pub fn dpotrf(uplo bool, n int, mut a []f64, lda int) { + info := lapack64.dpotrf(blas.uplo_from_bool(uplo), n, mut a, lda) if info != 0 { errors.vsl_panic('lapack failed', .efailed) } From d376d36eba8dddab6abd0cb4bae37da27bdffbc3 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sat, 22 Jun 2024 02:56:21 -0300 Subject: [PATCH 25/33] refactor: Update dlansy.v to use named constants for error messages --- lapack/lapack64/dlansy.v | 12 ++--- lapack/lapack64/dlascl.v | 108 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 6 deletions(-) create mode 100644 lapack/lapack64/dlascl.v diff --git a/lapack/lapack64/dlansy.v b/lapack/lapack64/dlansy.v index 04dd3284f..556c5d150 100644 --- a/lapack/lapack64/dlansy.v +++ b/lapack/lapack64/dlansy.v @@ -8,16 +8,16 @@ import vsl.blas // at least n, otherwise work is unused. pub fn dlansy(norm MatrixNorm, uplo blas.Uplo, n int, a []f64, lda int, mut work []f64) f64 { if norm != .max_row_sum && norm != .max_column_sum && norm != .frobenius && norm != .max_abs { - panic(lapack64.bad_norm) + panic(bad_norm) } if uplo != .upper && uplo != .lower { - panic(lapack64.bad_uplo) + panic(bad_uplo) } if n < 0 { panic('lapack: n < 0') } if lda < math.max(1, n) { - panic(lapack64.bad_ld_a) + panic(bad_ld_a) } // Quick return if possible. @@ -25,11 +25,11 @@ pub fn dlansy(norm MatrixNorm, uplo blas.Uplo, n int, a []f64, lda int, mut work return 0.0 } - if a.len < (n-1) * lda + n { - panic(lapack64.short_a) + if a.len < (n - 1) * lda + n { + panic(short_a) } if (norm == .max_column_sum || norm == .max_row_sum) && work.len < n { - panic(lapack64.short_work) + panic(short_work) } match norm { diff --git a/lapack/lapack64/dlascl.v b/lapack/lapack64/dlascl.v new file mode 100644 index 000000000..35f4c1294 --- /dev/null +++ b/lapack/lapack64/dlascl.v @@ -0,0 +1,108 @@ +module lapack64 + +import math + +// dlascl multiplies an m×n matrix by the scalar cto/cfrom. +// +// cfrom must not be zero, and cto and cfrom must not be NaN, otherwise dlascl +// will panic. +// +// dlascl is an internal routine. It is exported for testing purposes. +pub fn dlascl(kind MatrixType, kl int, ku int, cfrom f64, cto f64, m int, n int, mut a []f64, lda int) { + match kind { + .general, .upper_tri, .lower_tri { + if lda < math.max(1, n) { + panic(bad_ld_a) + } + } + } + if cfrom == 0.0 { + panic(zero_c_from) + } + if math.is_nan(cfrom) { + panic(nan_c_from) + } + if math.is_nan(cto) { + panic(nan_c_to) + } + if m < 0 { + panic(m_lt0) + } + if n < 0 { + panic(n_lt0) + } + + if n == 0 || m == 0 { + return + } + + match kind { + .general, .upper_tri, .lower_tri { + if a.len < (m - 1) * lda + n { + panic(short_a) + } + } + } + + smlnum := dlamch_s + bignum := 1.0 / smlnum + mut cfromc := cfrom + mut ctoc := cto + mut cfrom1 := cfromc * smlnum + for { + mut done := false + mut mul := 0.0 + mut ctol := 0.0 + if cfrom1 == cfromc { + // cfromc is inf. + mul = ctoc / cfromc + done = true + ctol = ctoc + } else { + ctol = ctoc / bignum + if ctol == ctoc { + // ctoc is either 0 or inf. + mul = ctoc + done = true + cfromc = 1.0 + } else if math.abs(cfrom1) > math.abs(ctoc) && ctoc != 0.0 { + mul = smlnum + done = false + cfromc = cfrom1 + } else if math.abs(ctol) > math.abs(cfromc) { + mul = bignum + done = false + ctoc = ctol + } else { + mul = ctoc / cfromc + done = true + } + } + match kind { + .general { + for i in 0 .. m { + for j in 0 .. n { + a[i * lda + j] *= mul + } + } + } + .upper_tri { + for i in 0 .. m { + for j in i .. n { + a[i * lda + j] *= mul + } + } + } + .lower_tri { + for i in 0 .. m { + for j in 0 .. math.min(i + 1, n) { + a[i * lda + j] *= mul + } + } + } + } + if done { + break + } + } +} From 99a3a2b28886bbe6199cf72db7cbf9e871178034 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sat, 22 Jun 2024 04:43:00 -0300 Subject: [PATCH 26/33] refactor: Update dpotrf function in lapack_notd_vsl_lapack_lapacke.v to use named constant for uplo parameter --- blas/conversions.v | 10 - lapack/lapack64/dlae2.v | 47 ++++ lapack/lapack64/dlanst.v | 68 ++++++ lapack/lapack64/dlapy2.v | 10 + lapack/lapack64/dlasrt.v | 28 +++ lapack/lapack64/dorg2l.v | 76 +++++++ lapack/lapack64/dorgql.v | 140 ++++++++++++ lapack/lapack64/dorgtr.v | 107 +++++++++ lapack/lapack64/dsterf.v | 280 ++++++++++++++++++++++++ lapack/lapack64/dsytrd.v | 185 ++++++++++++++++ lapack/lapack_notd_vsl_lapack_lapacke.v | 4 +- 11 files changed, 943 insertions(+), 12 deletions(-) create mode 100644 lapack/lapack64/dlae2.v create mode 100644 lapack/lapack64/dlanst.v create mode 100644 lapack/lapack64/dlapy2.v create mode 100644 lapack/lapack64/dlasrt.v create mode 100644 lapack/lapack64/dorg2l.v create mode 100644 lapack/lapack64/dorgql.v create mode 100644 lapack/lapack64/dorgtr.v create mode 100644 lapack/lapack64/dsterf.v create mode 100644 lapack/lapack64/dsytrd.v diff --git a/blas/conversions.v b/blas/conversions.v index 18ace678c..b421fa60c 100644 --- a/blas/conversions.v +++ b/blas/conversions.v @@ -21,16 +21,6 @@ pub type Diagonal = blas64.Diagonal // Side is used to specify whether a matrix is on the left or right side in a matrix-matrix multiplication. pub type Side = blas64.Side -// uplo_from_bool converts a boolean to Uplo. -pub fn uplo_from_bool(uplo bool) Uplo { - return if uplo { .upper } else { .lower } -} - -// uplo_to_bool converts Uplo to a boolean. -pub fn uplo_to_bool(uplo Uplo) bool { - return uplo == .upper -} - // slice_to_col_major converts nested slice into an array representing a col-major matrix // // _**NOTE**: make sure to have at least 1x1 item_ diff --git a/lapack/lapack64/dlae2.v b/lapack/lapack64/dlae2.v new file mode 100644 index 000000000..59111f8ed --- /dev/null +++ b/lapack/lapack64/dlae2.v @@ -0,0 +1,47 @@ +module lapack64 + +import math + +// dlae2 computes the eigenvalues of a 2×2 symmetric matrix +// +// [a b] +// [b c] +// +// and returns the eigenvalue with the larger absolute value as rt1 and the +// smaller as rt2. +// +// dlae2 is an internal routine. It is exported for testing purposes. +pub fn dlae2(a f64, b f64, c f64) (f64, f64) { + sm := a + c + df := a - c + adf := math.abs(df) + tb := b + b + ab := math.abs(tb) + mut acmx := c + mut acmn := a + if math.abs(a) > math.abs(c) { + acmx = a + acmn = c + } + mut rt := 0.0 + if adf > ab { + rt = adf * math.sqrt(1.0 + (ab / adf) * (ab / adf)) + } else if adf < ab { + rt = ab * math.sqrt(1.0 + (adf / ab) * (adf / ab)) + } else { + rt = ab * math.sqrt(2.0) + } + mut rt1 := 0.0 + mut rt2 := 0.0 + if sm < 0 { + rt1 = 0.5 * (sm - rt) + rt2 = (acmx / rt1) * acmn - (b / rt1) * b + } else if sm > 0 { + rt1 = 0.5 * (sm + rt) + rt2 = (acmx / rt1) * acmn - (b / rt1) * b + } else { + rt1 = 0.5 * rt + rt2 = -0.5 * rt + } + return rt1, rt2 +} diff --git a/lapack/lapack64/dlanst.v b/lapack/lapack64/dlanst.v new file mode 100644 index 000000000..24cb91461 --- /dev/null +++ b/lapack/lapack64/dlanst.v @@ -0,0 +1,68 @@ +module lapack64 + +import math + +// dlanst computes the specified norm of a symmetric tridiagonal matrix A. +// The diagonal elements of A are stored in d and the off-diagonal elements +// are stored in e. +pub fn dlanst(norm MatrixNorm, n int, d []f64, e []f64) f64 { + if norm != .max_row_sum && norm != .max_column_sum && norm != .frobenius && norm != .max_abs { + panic(bad_norm) + } + if n < 0 { + panic(n_lt0) + } + if n == 0 { + return 0.0 + } + if d.len < n { + panic(short_d) + } + if e.len < n - 1 { + panic(short_e) + } + + match norm { + .max_abs { + mut anorm := math.abs(d[n - 1]) + for i in 0 .. n - 1 { + mut sum := math.abs(d[i]) + if anorm < sum || math.is_nan(sum) { + anorm = sum + } + sum = math.abs(e[i]) + if anorm < sum || math.is_nan(sum) { + anorm = sum + } + } + return anorm + } + .max_row_sum, .max_column_sum { + if n == 1 { + return math.abs(d[0]) + } + mut anorm := math.abs(d[0]) + math.abs(e[0]) + mut sum := math.abs(e[n - 2]) + math.abs(d[n - 1]) + if anorm < sum || math.is_nan(sum) { + anorm = sum + } + for i in 1 .. n - 1 { + sum = math.abs(d[i]) + math.abs(e[i]) + math.abs(e[i - 1]) + if anorm < sum || math.is_nan(sum) { + anorm = sum + } + } + return anorm + } + .frobenius { + mut scale := 0.0 + mut sum := 1.0 + if n > 1 { + scale, sum = dlassq(n - 1, e, 1, scale, sum) + sum = 2 * sum + } + scale, sum = dlassq(n, d, 1, scale, sum) + return scale * math.sqrt(sum) + } + } +} diff --git a/lapack/lapack64/dlapy2.v b/lapack/lapack64/dlapy2.v new file mode 100644 index 000000000..f5b055d3a --- /dev/null +++ b/lapack/lapack64/dlapy2.v @@ -0,0 +1,10 @@ +module lapack64 + +import math + +// dlapy2 is the LAPACK version of math.hypot. +// +// dlapy2 is an internal routine. It is exported for testing purposes. +pub fn dlapy2(x f64, y f64) f64 { + return math.hypot(x, y) +} diff --git a/lapack/lapack64/dlasrt.v b/lapack/lapack64/dlasrt.v new file mode 100644 index 000000000..39b401b33 --- /dev/null +++ b/lapack/lapack64/dlasrt.v @@ -0,0 +1,28 @@ +module lapack64 + +import math + +// dlasrt sorts the numbers in the input slice d. If s == .increasing, +// the elements are sorted in increasing order. If s == .decreasing, +// the elements are sorted in decreasing order. For other values of s dlasrt +// will panic. +// +// dlasrt is an internal routine. It is exported for testing purposes. +pub fn dlasrt(s Sort, n int, mut d []f64) { + if n < 0 { + panic(n_lt0) + } + if d.len < n { + panic(short_d) + } + + d = unsafe { d[..n] } + match s { + .sort_increasing { + d.sort() + } + .sort_decreasing { + d.sort(b < a) + } + } +} diff --git a/lapack/lapack64/dorg2l.v b/lapack/lapack64/dorg2l.v new file mode 100644 index 000000000..2c2070c79 --- /dev/null +++ b/lapack/lapack64/dorg2l.v @@ -0,0 +1,76 @@ +module lapack64 + +import math +import vsl.blas + +// dorg2l generates an m×n matrix Q with orthonormal columns which is defined +// as the last n columns of a product of k elementary reflectors of order m. +// +// Q = H_{k-1} * ... * H_1 * H_0 +// +// See dgelqf for more information. It must be that m >= n >= k. +// +// tau contains the scalar reflectors computed by dgeqlf. tau must have length +// at least k, and dorg2l will panic otherwise. +// +// work contains temporary memory, and must have length at least n. dorg2l will +// panic otherwise. +// +// dorg2l is an internal routine. It is exported for testing purposes. +pub fn dorg2l(m int, n int, k int, mut a []f64, lda int, tau []f64, mut work []f64) { + if m < 0 { + panic(m_lt0) + } + if n < 0 { + panic(n_lt0) + } + if n > m { + panic(n_gtm) + } + if k < 0 { + panic(k_lt0) + } + if k > n { + panic(k_gtn) + } + if lda < math.max(1, n) { + panic(bad_ld_a) + } + + if n == 0 { + return + } + + if a.len < (m - 1) * lda + n { + panic(short_a) + } + if tau.len < k { + panic(short_tau) + } + if work.len < n { + panic(short_work) + } + + // Initialize columns 0:n-k to columns of the unit matrix. + for j := 0; j < n - k; j++ { + for l := 0; l < m; l++ { + a[l * lda + j] = 0 + } + a[(m - n + j) * lda + j] = 1 + } + + for i := 0; i < k; i++ { + ii := n - k + i + + // Apply H_i to A[0:m-k+i, 0:n-k+i] from the left. + a[(m - n + ii) * lda + ii] = 1 + dlarf(.left, m - n + ii + 1, ii, mut a[ii..], lda, tau[i], mut a, lda, mut work) + blas.dscal(m - n + ii, -tau[i], mut a[ii..], lda) + a[(m - n + ii) * lda + ii] = 1 - tau[i] + + // Set A[m-k+i:m, n-k+i+1] to zero. + for l := m - n + ii + 1; l < m; l++ { + a[l * lda + ii] = 0 + } + } +} diff --git a/lapack/lapack64/dorgql.v b/lapack/lapack64/dorgql.v new file mode 100644 index 000000000..b16636e57 --- /dev/null +++ b/lapack/lapack64/dorgql.v @@ -0,0 +1,140 @@ +module lapack64 + +import math +import vsl.blas + +// dorgql generates the m×n matrix Q with orthonormal columns defined as the +// last n columns of a product of k elementary reflectors of order m +// +// Q = H_{k-1} * ... * H_1 * H_0. +// +// It must hold that +// +// 0 <= k <= n <= m, +// +// and dorgql will panic otherwise. +// +// On entry, the (n-k+i)-th column of A must contain the vector which defines +// the elementary reflector H_i, for i=0,...,k-1, and tau[i] must contain its +// scalar factor. On return, a contains the m×n matrix Q. +// +// tau must have length at least k, and dorgql will panic otherwise. +// +// work must have length at least max(1,lwork), and lwork must be at least +// max(1,n), otherwise dorgql will panic. For optimum performance lwork must +// be a sufficiently large multiple of n. +// +// If lwork == -1, instead of computing dorgql the optimal work length is stored +// into work[0]. +// +// dorgql is an internal routine. It is exported for testing purposes. +pub fn dorgql(m int, n int, k int, mut a []f64, lda int, tau []f64, mut work []f64, lwork int) { + if m < 0 { + panic(m_lt0) + } + if n < 0 { + panic(n_lt0) + } + if n > m { + panic(n_gtm) + } + if k < 0 { + panic(k_lt0) + } + if k > n { + panic(k_gtn) + } + if lda < math.max(1, n) { + panic(bad_ld_a) + } + if lwork < math.max(1, n) && lwork != -1 { + panic(bad_l_work) + } + if work.len < math.max(1, lwork) { + panic(short_work) + } + + // Quick return if possible. + if n == 0 { + work[0] = 1 + return + } + + mut nb := ilaenv(1, 'DORGQL', ' ', m, n, k, -1) + if lwork == -1 { + work[0] = f64(n * nb) + return + } + + if a.len < (m - 1) * lda + n { + panic(short_a) + } + if tau.len < k { + panic(short_tau) + } + + mut nbmin := 2 + mut nx := 0 + mut ldwork := 0 + mut iws := n + if 1 < nb && nb < k { + // Determine when to cross over from blocked to unblocked code. + nx = math.max(0, ilaenv(3, 'DORGQL', ' ', m, n, k, -1)) + if nx < k { + // Determine if workspace is large enough for blocked code. + iws = n * nb + if lwork < iws { + // Not enough workspace to use optimal nb: reduce nb and determine + // the minimum value of nb. + nb = lwork / n + nbmin = math.max(2, ilaenv(2, 'DORGQL', ' ', m, n, k, -1)) + } + ldwork = nb + } + } + + mut kk := 0 + if nbmin <= nb && nb < k && nx < k { + // Use blocked code after the first block. The last kk columns are handled + // by the block method. + kk = math.min(k, ((k - nx + nb - 1) / nb) * nb) + + // Set A(m-kk:m, 0:n-kk) to zero. + for i := m - kk; i < m; i++ { + for j := 0; j < n - kk; j++ { + a[i * lda + j] = 0 + } + } + } + + // Use unblocked code for the first or only block. + dorg2l(m - kk, n - kk, k - kk, mut a, lda, tau, mut work) + if kk > 0 { + // Use blocked code. + for i := k - kk; i < k; i += nb { + ib := math.min(nb, k - i) + if n - k + i > 0 { + // Form the triangular factor of the block reflector + // H = H_{i+ib-1} * ... * H_{i+1} * H_i. + dlarft(.backward, .column_wise, m - k + i + ib, ib, mut a[n - k + i..], + lda, tau[i..], mut work, ldwork) + + // Apply H to A[0:m-k+i+ib, 0:n-k+i] from the left. + dlarfb(.left, .no_trans, .backward, .column_wise, m - k + i + ib, n - k + i, + ib, a[n - k + i..], lda, work, ldwork, mut a, lda, mut work[ib * ldwork..], + ldwork) + } + + // Apply H to rows 0:m-k+i+ib of current block. + dorg2l(m - k + i + ib, ib, ib, mut a[n - k + i..], lda, tau[i..], mut work) + + // Set rows m-k+i+ib:m of current block to zero. + for j := n - k + i; j < n - k + i + ib; j++ { + for l := m - k + i + ib; l < m; l++ { + a[l * lda + j] = 0 + } + } + } + } + work[0] = f64(iws) +} diff --git a/lapack/lapack64/dorgtr.v b/lapack/lapack64/dorgtr.v new file mode 100644 index 000000000..bf3829715 --- /dev/null +++ b/lapack/lapack64/dorgtr.v @@ -0,0 +1,107 @@ +module lapack64 + +import math +import vsl.blas + +// dorgtr generates a real orthogonal matrix Q which is defined as the product +// of n-1 elementary reflectors of order n as returned by dsytrd. +// +// The construction of Q depends on the value of uplo: +// +// Q = H_{n-1} * ... * H_1 * H_0 if uplo == blas.Upper +// Q = H_0 * H_1 * ... * H_{n-1} if uplo == blas.Lower +// +// where H_i is constructed from the elementary reflectors as computed by dsytrd. +// See the documentation for dsytrd for more information. +// +// tau must have length at least n-1, and dorgtr will panic otherwise. +// +// work is temporary storage, and lwork specifies the usable memory length. At +// minimum, lwork >= max(1,n-1), and dorgtr will panic otherwise. The amount of blocking +// is limited by the usable length. +// If lwork == -1, instead of computing dorgtr the optimal work length is stored +// into work[0]. +// +// dorgtr is an internal routine. It is exported for testing purposes. +pub fn dorgtr(uplo blas.Uplo, n int, mut a []f64, lda int, tau []f64, mut work []f64, lwork int) { + if uplo != .upper && uplo != .lower { + panic(bad_uplo) + } + if n < 0 { + panic(n_lt0) + } + if lda < math.max(1, n) { + panic(bad_ld_a) + } + if lwork < math.max(1, n - 1) && lwork != -1 { + panic(bad_l_work) + } + if work.len < math.max(1, lwork) { + panic(short_work) + } + + if n == 0 { + work[0] = 1 + return + } + + mut nb := 0 + if uplo == .upper { + nb = ilaenv(1, 'DORGQL', ' ', n - 1, n - 1, n - 1, -1) + } else { + nb = ilaenv(1, 'DORGQR', ' ', n - 1, n - 1, n - 1, -1) + } + lworkopt := math.max(1, n - 1) * nb + if lwork == -1 { + work[0] = f64(lworkopt) + return + } + + if a.len < (n - 1) * lda + n { + panic(short_a) + } + if tau.len < n - 1 { + panic(short_tau) + } + + if uplo == .upper { + // Q was determined by a call to dsytrd with uplo == blas.Upper. + // Shift the vectors which define the elementary reflectors one column + // to the left, and set the last row and column of Q to those of the unit + // matrix. + for j := 0; j < n - 1; j++ { + for i := 0; i < j; i++ { + a[i * lda + j] = unsafe { a[i * lda + j + 1] } + } + a[(n - 1) * lda + j] = 0 + } + for i := 0; i < n - 1; i++ { + a[i * lda + n - 1] = 0 + } + a[(n - 1) * lda + n - 1] = 1 + + // Generate Q[0:n-1, 0:n-1]. + dorgql(n - 1, n - 1, n - 1, mut a, lda, tau, mut work, lwork) + } else { + // Q was determined by a call to dsytrd with uplo == blas.Lower. + // Shift the vectors which define the elementary reflectors one column + // to the right, and set the first row and column of Q to those of the unit + // matrix. + for j := n - 1; j > 0; j-- { + a[j] = 0 + for i := j + 1; i < n; i++ { + a[i * lda + j] = unsafe { a[i * lda + j - 1] } + } + } + a[0] = 1 + for i := 1; i < n; i++ { + a[i * lda] = 0 + } + if n > 1 { + mut a_sub := unsafe { a[lda + 1..] } + // Generate Q[1:n, 1:n]. + dorgqr(n - 1, n - 1, n - 1, mut a_sub, lda, tau[..n - 1], mut work, lwork) + } + } + work[0] = f64(lworkopt) +} diff --git a/lapack/lapack64/dsterf.v b/lapack/lapack64/dsterf.v new file mode 100644 index 000000000..c0db958aa --- /dev/null +++ b/lapack/lapack64/dsterf.v @@ -0,0 +1,280 @@ +module lapack64 + +import math + +// dsterf computes all eigenvalues of a symmetric tridiagonal matrix using the +// Pal-Walker-Kahan variant of the QL or QR algorithm. +// +// d contains the diagonal elements of the tridiagonal matrix on entry, and +// contains the eigenvalues in ascending order on exit. d must have length at +// least n, or dsterf will panic. +// +// e contains the off-diagonal elements of the tridiagonal matrix on entry, and is +// overwritten during the call to dsterf. e must have length of at least n-1 or +// dsterf will panic. +// +// dsterf is an internal routine. It is exported for testing purposes. +pub fn dsterf(n int, mut d []f64, mut e []f64) bool { + if n < 0 { + panic(n_lt0) + } + + // Quick return if possible. + if n == 0 { + return true + } + + if d.len < n { + panic(short_d) + } + if e.len < n - 1 { + panic(short_e) + } + + if n == 1 { + return true + } + + none_scaled := 0 // The values are not scaled. + down := 1 // The values are scaled below ssfmax threshold. + up := 2 // The values are scaled below ssfmin threshold. + + // Determine the unit roundoff for this environment. + eps := dlamch_e + eps2 := eps * eps + safmin := dlamch_s + safmax := 1.0 / safmin + ssfmax := math.sqrt(safmax) / 3.0 + ssfmin := math.sqrt(safmin) / eps2 + + // Compute the eigenvalues of the tridiagonal matrix. + maxit := 30 + nmaxit := n * maxit + mut jtot := 0 + + mut l1 := 0 + + for { + if l1 > n - 1 { + dlasrt(.sort_increasing, n, mut d) + return true + } + if l1 > 0 { + e[l1 - 1] = 0 + } + mut m := 0 + for m = l1; m < n - 1; m++ { + if math.abs(e[m]) <= math.sqrt(math.abs(d[m])) * math.sqrt(math.abs(d[m + 1])) * eps { + e[m] = 0 + break + } + } + + mut l := l1 + lsv := l + mut lend := m + lendsv := lend + l1 = m + 1 + if lend == 0 { + continue + } + + // Scale submatrix in rows and columns l to lend. + anorm := dlanst(.max_abs, lend - l + 1, d[l..], e[l..]) + mut iscale := none_scaled + if anorm == 0.0 { + continue + } + if anorm > ssfmax { + iscale = down + dlascl(.general, 0, 0, anorm, ssfmax, lend - l + 1, 1, mut d[l..], n) + dlascl(.general, 0, 0, anorm, ssfmax, lend - l, 1, mut e[l..], n) + } else if anorm < ssfmin { + iscale = up + dlascl(.general, 0, 0, anorm, ssfmin, lend - l + 1, 1, mut d[l..], n) + dlascl(.general, 0, 0, anorm, ssfmin, lend - l, 1, mut e[l..], n) + } + + mut el := unsafe { e[l..lend] } + for i, v in el { + el[i] *= v + } + + // Choose between QL and QR iteration. + if math.abs(d[lend]) < math.abs(d[l]) { + lend = lsv + l = lendsv + } + if lend >= l { + // QL Iteration. + // Look for small sub-diagonal element. + for { + if l != lend { + for m = l; m < lend; m++ { + if math.abs(e[m]) <= eps2 * (math.abs(d[m] * d[m + 1])) { + break + } + } + } else { + m = lend + } + if m < lend { + e[m] = 0 + } + mut p := d[l] + if m == l { + // Eigenvalue found. + l++ + if l > lend { + break + } + continue + } + // If remaining matrix is 2 by 2, use Dlae2 to compute its eigenvalues. + if m == l + 1 { + d[l], d[l + 1] = dlae2(d[l], math.sqrt(e[l]), d[l + 1]) + e[l] = 0 + l += 2 + if l > lend { + break + } + continue + } + if jtot == nmaxit { + break + } + jtot++ + + // Form shift. + rte := math.sqrt(e[l]) + mut sigma := (d[l + 1] - p) / (2.0 * rte) + r := dlapy2(sigma, 1.0) + sigma = p - (rte / (sigma + math.copysign(r, sigma))) + + mut c := 1.0 + mut s := 0.0 + mut gamma := d[m] - sigma + p = gamma * gamma + + // Inner loop. + for i := m - 1; i >= l; i-- { + bb := e[i] + r_ := p + bb + if i != m - 1 { + e[i + 1] = s * r_ + } + oldc := c + c = p / r_ + s = bb / r_ + oldgam := gamma + alpha := d[i] + gamma = c * (alpha - sigma) - s * oldgam + d[i + 1] = oldgam + (alpha - gamma) + if c != 0.0 { + p = (gamma * gamma) / c + } else { + p = oldc * bb + } + } + e[l] = s * p + d[l] = sigma + gamma + } + } else { + for { + // QR Iteration. + // Look for small super-diagonal element. + for m = l; m > lend; m-- { + if math.abs(e[m - 1]) <= eps2 * math.abs(d[m] * d[m - 1]) { + break + } + } + if m > lend { + e[m - 1] = 0 + } + mut p := d[l] + if m == l { + // Eigenvalue found. + l-- + if l < lend { + break + } + continue + } + + // If remaining matrix is 2 by 2, use Dlae2 to compute its eigenvalues. + if m == l - 1 { + d[l], d[l - 1] = dlae2(d[l], math.sqrt(e[l - 1]), d[l - 1]) + e[l - 1] = 0 + l -= 2 + if l < lend { + break + } + continue + } + if jtot == nmaxit { + break + } + jtot++ + + // Form shift. + rte := math.sqrt(e[l - 1]) + mut sigma := (d[l - 1] - p) / (2.0 * rte) + r := dlapy2(sigma, 1.0) + sigma = p - (rte / (sigma + math.copysign(r, sigma))) + + mut c := 1.0 + mut s := 0.0 + mut gamma := d[m] - sigma + p = gamma * gamma + + // Inner loop. + for i := m; i < l; i++ { + bb := e[i] + r_ := p + bb + if i != m { + e[i - 1] = s * r_ + } + oldc := c + c = p / r_ + s = bb / r_ + oldgam := gamma + alpha := d[i + 1] + gamma = c * (alpha - sigma) - s * oldgam + d[i] = oldgam + alpha - gamma + if c != 0.0 { + p = (gamma * gamma) / c + } else { + p = oldc * bb + } + } + e[l - 1] = s * p + d[l] = sigma + gamma + } + } + + // Undo scaling if necessary + match iscale { + down { + dlascl(.general, 0, 0, ssfmax, anorm, lendsv - lsv + 1, 1, mut d[lsv..], + n) + } + up { + dlascl(.general, 0, 0, ssfmin, anorm, lendsv - lsv + 1, 1, mut d[lsv..], + n) + } + else {} + } + + // Check for no convergence to an eigenvalue after a total of n*maxit iterations. + if jtot >= nmaxit { + break + } + } + for v in e[0..n - 1] { + if v != 0.0 { + return false + } + } + dlasrt(.sort_increasing, n, mut d) + return true +} diff --git a/lapack/lapack64/dsytrd.v b/lapack/lapack64/dsytrd.v new file mode 100644 index 000000000..e68f16259 --- /dev/null +++ b/lapack/lapack64/dsytrd.v @@ -0,0 +1,185 @@ +module lapack64 + +import math +import vsl.blas + +// dsytrd reduces a symmetric n×n matrix A to symmetric tridiagonal form by an +// orthogonal similarity transformation +// +// Qᵀ * A * Q = T +// +// where Q is an orthonormal matrix and T is symmetric and tridiagonal. +// +// On entry, a contains the elements of the input matrix in the triangle specified +// by uplo. On exit, the diagonal and sub/super-diagonal are overwritten by the +// corresponding elements of the tridiagonal matrix T. The remaining elements in +// the triangle, along with the array tau, contain the data to construct Q as +// the product of elementary reflectors. +// +// If uplo == blas.upper, Q is constructed with +// +// Q = H_{n-2} * ... * H_1 * H_0 +// +// where +// +// H_i = I - tau_i * v * vᵀ +// +// v is constructed as v[i+1:n] = 0, v[i] = 1, v[0:i-1] is stored in A[0:i-1, i+1]. +// The elements of A are +// +// [ d e v1 v2 v3] +// [ d e v2 v3] +// [ d e v3] +// [ d e] +// [ e] +// +// If uplo == blas.lower, Q is constructed with +// +// Q = H_0 * H_1 * ... * H_{n-2} +// +// where +// +// H_i = I - tau_i * v * vᵀ +// +// v is constructed as v[0:i+1] = 0, v[i+1] = 1, v[i+2:n] is stored in A[i+2:n, i]. +// The elements of A are +// +// [ d ] +// [ e d ] +// [v0 e d ] +// [v0 v1 e d ] +// [v0 v1 v2 e d] +// +// d must have length n, and e and tau must have length n-1. dsytrd will panic if +// these conditions are not met. +// +// work is temporary storage, and lwork specifies the usable memory length. At minimum, +// lwork >= 1, and dsytrd will panic otherwise. The amount of blocking is +// limited by the usable length. +// If lwork == -1, instead of computing dsytrd the optimal work length is stored +// into work[0]. +// +// dsytrd is an internal routine. It is exported for testing purposes. +pub fn dsytrd(uplo blas.Uplo, n int, mut a []f64, lda int, mut d []f64, mut e []f64, mut tau []f64, mut work []f64, lwork int) { + if uplo != .upper && uplo != .lower { + panic(bad_uplo) + } + if n < 0 { + panic(n_lt0) + } + if lda < math.max(1, n) { + panic(bad_ld_a) + } + if lwork < 1 && lwork != -1 { + panic(bad_l_work) + } + if work.len < math.max(1, lwork) { + panic(short_work) + } + + // Quick return if possible. + if n == 0 { + work[0] = 1 + return + } + + mut nb := ilaenv(1, 'DSYTRD', if uplo == .upper { 'U' } else { 'L' }, n, -1, -1, -1) + lworkopt := n * nb + if lwork == -1 { + work[0] = f64(lworkopt) + return + } + + if a.len < (n - 1) * lda + n { + panic(short_a) + } + if d.len < n { + panic(short_d) + } + if e.len < n - 1 { + panic(short_e) + } + if tau.len < n - 1 { + panic(short_tau) + } + + mut nx := n + mut iws := 1 + mut ldwork := 0 + if 1 < nb && nb < n { + // Determine when to cross over from blocked to unblocked code. The last + // block is always handled by unblocked code. + nx = math.max(nb, ilaenv(3, 'DSYTRD', if uplo == .upper { 'U' } else { 'L' }, + n, -1, -1, -1)) + if nx < n { + // Determine if workspace is large enough for blocked code. + ldwork = nb + iws = n * ldwork + if lwork < iws { + // Not enough workspace to use optimal nb: determine the minimum + // value of nb and reduce nb or force use of unblocked code by + // setting nx = n. + nb = math.max(lwork / n, 1) + nbmin := ilaenv(2, 'DSYTRD', if uplo == .upper { 'U' } else { 'L' }, n, + -1, -1, -1) + if nb < nbmin { + nx = n + } + } + } else { + nx = n + } + } else { + nb = 1 + } + ldwork = nb + + if uplo == .upper { + // Reduce the upper triangle of A. Columns 0:kk are handled by the + // unblocked method. + mut i := 0 + kk := n - ((n - nx + nb - 1) / nb) * nb + for i = n - nb; i >= kk; i -= nb { + // Reduce columns i:i+nb to tridiagonal form and form the matrix W + // which is needed to update the unreduced part of the matrix. + dlatrd(uplo, i + nb, nb, mut a, lda, mut e, mut tau, mut work, ldwork) + + // Update the unreduced submatrix A[0:i-1,0:i-1], using an update + // of the form A = A - V*Wᵀ - W*Vᵀ. + blas.dsyr2k(uplo, .no_trans, i, nb, -1.0, a[i * lda..], lda, work, ldwork, + 1.0, mut a, lda) + + // Copy superdiagonal elements back into A, and diagonal elements into D. + for j := i; j < i + nb; j++ { + a[(j - 1) * lda + j] = e[j - 1] + d[j] = a[j * lda + j] + } + } + // Use unblocked code to reduce the last or only block + dsytd2(uplo, kk, mut a, lda, mut d, mut e, mut tau) + } else { + mut i := 0 + // Reduce the lower triangle of A. + for i = 0; i < n - nx; i += nb { + // Reduce columns 0:i+nb to tridiagonal form and form the matrix W + // which is needed to update the unreduced part of the matrix. + dlatrd(uplo, n - i, nb, mut a[i * lda + i..], lda, mut e[i..], mut tau[i..], mut + work, ldwork) + + // Update the unreduced submatrix A[i+ib:n, i+ib:n], using an update + // of the form A = A + V*Wᵀ - W*Vᵀ. + blas.dsyr2k(uplo, .no_trans, n - i - nb, nb, -1.0, a[(i + nb) * lda + i..], + lda, work[nb * ldwork..], ldwork, 1.0, mut a[(i + nb) * lda + i + nb..], + lda) + + // Copy subdiagonal elements back into A, and diagonal elements into D. + for j := i; j < i + nb; j++ { + a[(j + 1) * lda + j] = e[j] + d[j] = a[j * lda + j] + } + } + // Use unblocked code to reduce the last or only block. + dsytd2(uplo, n - i, mut a[i * lda + i..], lda, mut d[i..], mut e[i..], mut tau[i..]) + } + work[0] = f64(iws) +} diff --git a/lapack/lapack_notd_vsl_lapack_lapacke.v b/lapack/lapack_notd_vsl_lapack_lapacke.v index 7719a8c53..e16020046 100644 --- a/lapack/lapack_notd_vsl_lapack_lapacke.v +++ b/lapack/lapack_notd_vsl_lapack_lapacke.v @@ -111,8 +111,8 @@ pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) { // where U is an upper triangular matrix and L is lower triangular. // // This is the block version of the algorithm, calling Level 3 BLAS. -pub fn dpotrf(uplo bool, n int, mut a []f64, lda int) { - info := lapack64.dpotrf(blas.uplo_from_bool(uplo), n, mut a, lda) +pub fn dpotrf(uplo blas.Uplo, n int, mut a []f64, lda int) { + info := lapack64.dpotrf(uplo, n, mut a, lda) if info != 0 { errors.vsl_panic('lapack failed', .efailed) } From 5f9a1fe0e5c16c0b2602805a7312691e2de105a6 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 23 Jun 2024 03:59:36 -0300 Subject: [PATCH 27/33] refactor: Update conversions.v, dgetf2.v, dsyev.v, and lapack_notd_vsl_lapack_lapacke.v --- blas/blas64/conversions.v | 1 + lapack/lapack64/dlaev2.v | 73 ++++++++ lapack/lapack64/dlarf.v | 92 ++++++++++ lapack/lapack64/dlarfb.v | 259 ++++++++++++++++++++++++++ lapack/lapack64/dlarfg.v | 51 ++++++ lapack/lapack64/dlarft.v | 155 ++++++++++++++++ lapack/lapack64/dlartg.v | 56 ++++++ lapack/lapack64/dlaset.v | 59 ++++++ lapack/lapack64/dlasr.v | 277 ++++++++++++++++++++++++++++ lapack/lapack64/dlatrd.v | 108 +++++++++++ lapack/lapack64/dorg2l.v | 2 +- lapack/lapack64/dorg2r.v | 66 +++++++ lapack/lapack64/dorgql.v | 4 +- lapack/lapack64/dorgqr.v | 104 +++++++++++ lapack/lapack64/dsteqr.v | 375 ++++++++++++++++++++++++++++++++++++++ lapack/lapack64/dsyev.v | 4 +- lapack/lapack64/dsytd2.v | 144 +++++++++++++++ lapack/lapack64/errors.v | 6 +- lapack/lapack64/iladlc.v | 42 +++++ lapack/lapack64/iladlr.v | 38 ++++ 20 files changed, 1908 insertions(+), 8 deletions(-) create mode 100644 lapack/lapack64/dlaev2.v create mode 100644 lapack/lapack64/dlarf.v create mode 100644 lapack/lapack64/dlarfb.v create mode 100644 lapack/lapack64/dlarfg.v create mode 100644 lapack/lapack64/dlarft.v create mode 100644 lapack/lapack64/dlartg.v create mode 100644 lapack/lapack64/dlaset.v create mode 100644 lapack/lapack64/dlasr.v create mode 100644 lapack/lapack64/dlatrd.v create mode 100644 lapack/lapack64/dorg2r.v create mode 100644 lapack/lapack64/dorgqr.v create mode 100644 lapack/lapack64/dsteqr.v create mode 100644 lapack/lapack64/dsytd2.v create mode 100644 lapack/lapack64/iladlc.v create mode 100644 lapack/lapack64/iladlr.v diff --git a/blas/blas64/conversions.v b/blas/blas64/conversions.v index 6688d5887..cf0b75a6e 100644 --- a/blas/blas64/conversions.v +++ b/blas/blas64/conversions.v @@ -18,6 +18,7 @@ pub enum Transpose { pub enum Uplo { upper = 121 lower = 122 + all = 99 } // Diagonal is used to specify whether the diagonal of a matrix is unit or non-unit. diff --git a/lapack/lapack64/dlaev2.v b/lapack/lapack64/dlaev2.v new file mode 100644 index 000000000..cf6fcc011 --- /dev/null +++ b/lapack/lapack64/dlaev2.v @@ -0,0 +1,73 @@ +module lapack64 + +import math + +pub fn dlaev2(a f64, b f64, c f64) (f64, f64, f64, f64) { + sm := a + c + df := a - c + adf := math.abs(df) + tb := b + b + ab := math.abs(tb) + mut acmx := c + mut acmn := a + if math.abs(a) > math.abs(c) { + acmx = a + acmn = c + } + mut rt := 0.0 + if adf > ab { + rt = adf * math.sqrt(1 + (ab / adf) * (ab / adf)) + } else if adf < ab { + rt = ab * math.sqrt(1 + (adf / ab) * (adf / ab)) + } else { + rt = ab * math.sqrt(2) + } + mut rt1 := 0.0 + mut rt2 := 0.0 + mut cs1 := 0.0 + mut sn1 := 0.0 + mut sgn1 := 0.0 + if sm < 0 { + rt1 = 0.5 * (sm - rt) + sgn1 = -1 + rt2 = (acmx / rt1) * acmn - (b / rt1) * b + } else if sm > 0 { + rt1 = 0.5 * (sm + rt) + sgn1 = 1 + rt2 = (acmx / rt1) * acmn - (b / rt1) * b + } else { + rt1 = 0.5 * rt + rt2 = -0.5 * rt + sgn1 = 1 + } + mut cs := 0.0 + mut sgn2 := 0.0 + if df >= 0 { + cs = df + rt + sgn2 = 1 + } else { + cs = df - rt + sgn2 = -1 + } + acs := math.abs(cs) + if acs > ab { + ct := -tb / cs + sn1 = 1 / math.sqrt(1 + ct * ct) + cs1 = ct * sn1 + } else { + if ab == 0 { + cs1 = 1 + sn1 = 0 + } else { + tn := -cs / tb + cs1 = 1 / math.sqrt(1 + tn * tn) + sn1 = tn * cs1 + } + } + if sgn1 == sgn2 { + tn := cs1 + cs1 = -sn1 + sn1 = tn + } + return rt1, rt2, cs1, sn1 +} diff --git a/lapack/lapack64/dlarf.v b/lapack/lapack64/dlarf.v new file mode 100644 index 000000000..217279980 --- /dev/null +++ b/lapack/lapack64/dlarf.v @@ -0,0 +1,92 @@ +module lapack64 + +import math +import vsl.blas + +// dlarf applies an elementary reflector H to an m×n matrix C: +// +// C = H * C if side == .left +// C = C * H if side == .right +// +// H is represented in the form +// +// H = I - tau * v * vᵀ +// +// where tau is a scalar and v is a vector. +// +// work must have length at least m if side == .left and +// at least n if side == .right. +// +// dlarf is an internal routine. It is exported for testing purposes. +pub fn dlarf(side blas.Side, m int, n int, v []f64, incv int, tau f64, mut c []f64, ldc int, mut work []f64) { + if side != .left && side != .right { + panic(bad_side) + } + if m < 0 { + panic(m_lt0) + } + if n < 0 { + panic(n_lt0) + } + if incv == 0 { + panic(zero_inc_v) + } + if ldc < math.max(1, n) { + panic(bad_ld_c) + } + + if m == 0 || n == 0 { + return + } + + applyleft := side == .left + len_v := if applyleft { m } else { n } + + if v.len < 1 + (len_v - 1) * math.abs(incv) { + panic(short_v) + } + if c.len < (m - 1) * ldc + n { + panic(short_c) + } + if (applyleft && work.len < n) || (!applyleft && work.len < m) { + panic(short_work) + } + + mut lastv := -1 // last non-zero element of v + mut lastc := -1 // last non-zero row/column of C + if tau != 0 { + lastv = if applyleft { m - 1 } else { n - 1 } + mut i := if incv > 0 { lastv * incv } else { 0 } + // Look for the last non-zero row in v. + for lastv >= 0 && v[i] == 0 { + lastv-- + i -= incv + } + if applyleft { + // Scan for the last non-zero column in C[0:lastv, :] + lastc = iladlc(lastv + 1, n, c, ldc) + } else { + // Scan for the last non-zero row in C[:, 0:lastv] + lastc = iladlr(m, lastv + 1, c, ldc) + } + } + if lastv == -1 || lastc == -1 { + return + } + + if applyleft { + // Form H * C + // w[0:lastc+1] = c[1:lastv+1, 1:lastc+1]ᵀ * v[1:lastv+1,1] + blas.dgemv(.trans, lastv + 1, lastc + 1, 1.0, c, ldc, v, incv, 0.0, mut work, + 1) + // c[0: lastv, 0: lastc] = c[...] - w[0:lastv, 1] * v[1:lastc, 1]ᵀ + blas.dger(lastv + 1, lastc + 1, -tau, v, incv, work, 1, mut c, ldc) + } else { + // Form C * H + // w[0:lastc+1,1] := c[0:lastc+1,0:lastv+1] * v[0:lastv+1,1] + blas.dgemv(.no_trans, lastc + 1, lastv + 1, 1.0, c, ldc, v, incv, 0.0, mut work, + 1) + // c[0:lastc+1,0:lastv+1] = c[...] - w[0:lastc+1,0] * v[0:lastv+1,0]ᵀ + blas.dger(lastc + 1, lastv + 1, -tau, work, 1, v, incv, mut c, ldc) + } +} diff --git a/lapack/lapack64/dlarfb.v b/lapack/lapack64/dlarfb.v new file mode 100644 index 000000000..932acf752 --- /dev/null +++ b/lapack/lapack64/dlarfb.v @@ -0,0 +1,259 @@ +module lapack64 + +import math +import vsl.blas + +pub fn dlarfb(side blas.Side, trans blas.Transpose, direct Direct, store StoreV, m int, n int, k int, v []f64, ldv int, t []f64, ldt int, mut c []f64, ldc int, mut work []f64, ldwork int) { + if side != .left && side != .right { + panic(bad_side) + } + if trans != .trans && trans != .no_trans { + panic(bad_trans) + } + if direct != .forward && direct != .backward { + panic(bad_direct) + } + if store != .column_wise && store != .row_wise { + panic(bad_store_v) + } + if m < 0 { + panic(m_lt0) + } + if n < 0 { + panic(n_lt0) + } + if k < 0 { + panic(k_lt0) + } + if store == .column_wise && ldv < math.max(1, k) { + panic(bad_ld_v) + } + if store == .row_wise && ldv < math.max(1, m) { + panic(bad_ld_v) + } + if ldt < math.max(1, k) { + panic(bad_ld_t) + } + if ldc < math.max(1, n) { + panic(bad_ld_c) + } + if ldwork < math.max(1, k) { + panic(bad_ld_work) + } + + if m == 0 || n == 0 { + return + } + + mut nv := m + if side == .right { + nv = n + } + if store == .column_wise && v.len < (nv - 1) * ldv + k { + panic(short_v) + } + if store == .row_wise && v.len < (k - 1) * ldv + nv { + panic(short_v) + } + if t.len < (k - 1) * ldt + k { + panic(short_t) + } + if c.len < (m - 1) * ldc + n { + panic(short_c) + } + if work.len < (nv - 1) * ldwork + k { + panic(short_work) + } + + transt := if trans == .trans { blas.Transpose.no_trans } else { blas.Transpose.trans } + + if store == .column_wise { + if direct == .forward { + if side == .left { + for j := 0; j < k; j++ { + blas.dcopy(n, c[j * ldc..], 1, mut work[j * ldwork..], 1) + } + blas.dtrmm(.right, .lower, .no_trans, .unit, n, k, 1.0, v, ldv, mut work, + ldwork) + if m > k { + blas.dgemm(.trans, .no_trans, n, k, m - k, 1.0, c[k * ldc..], ldc, + v[k * ldv..], ldv, 1.0, mut work, ldwork) + } + blas.dtrmm(.right, .upper, transt, .non_unit, n, k, 1.0, t, ldt, mut work, + ldwork) + if m > k { + blas.dgemm(.no_trans, .trans, m - k, n, k, -1.0, v[k * ldv..], ldv, + work, ldwork, 1.0, mut c[k * ldc..], ldc) + } + blas.dtrmm(.right, .lower, .trans, .unit, n, k, 1.0, v, ldv, mut work, + ldwork) + for i := 0; i < n; i++ { + for j := 0; j < k; j++ { + c[j * ldc + i] -= unsafe { work[i * ldwork + j] } + } + } + return + } + for j := 0; j < k; j++ { + blas.dcopy(m, c[j..], ldc, mut work[j * ldwork..], 1) + } + blas.dtrmm(.right, .lower, .no_trans, .unit, m, k, 1.0, v, ldv, mut work, + ldwork) + if n > k { + blas.dgemm(.no_trans, .no_trans, m, k, n - k, 1.0, c[k..], ldc, v[k * ldv..], + ldv, 1.0, mut work, ldwork) + } + blas.dtrmm(.right, .upper, trans, .non_unit, m, k, 1.0, t, ldt, mut work, + ldwork) + if n > k { + blas.dgemm(.no_trans, .no_trans, m, n - k, k, -1.0, work, ldwork, v[k * ldv..], + ldv, 1.0, mut c[k..], ldc) + } + blas.dtrmm(.right, .lower, .trans, .unit, m, k, 1.0, v, ldv, mut work, ldwork) + for i := 0; i < m; i++ { + for j := 0; j < k; j++ { + c[i * ldc + j] -= unsafe { work[i * ldwork + j] } + } + } + return + } + if side == .left { + for j := 0; j < k; j++ { + blas.dcopy(n, c[(m - k + j) * ldc..], 1, mut work[j * ldwork..], 1) + } + blas.dtrmm(.right, .upper, .no_trans, .unit, n, k, 1.0, v[(m - k) * ldv..], + ldv, mut work, ldwork) + if m > k { + blas.dgemm(.trans, .no_trans, n, k, m - k, 1.0, c, ldc, v, ldv, 1.0, mut + work, ldwork) + } + blas.dtrmm(.right, .lower, transt, .non_unit, n, k, 1.0, t, ldt, mut work, + ldwork) + if m > k { + blas.dgemm(.no_trans, .trans, m - k, n, k, -1.0, v, ldv, work, ldwork, + 1.0, mut c, ldc) + } + blas.dtrmm(.right, .upper, .trans, .unit, n, k, 1.0, v[(m - k) * ldv..], ldv, mut + work, ldwork) + for i := 0; i < n; i++ { + for j := 0; j < k; j++ { + c[(m - k + j) * ldc + i] -= unsafe { work[i * ldwork + j] } + } + } + return + } + for j := 0; j < k; j++ { + blas.dcopy(m, c[(n - k + j)..], ldc, mut work[j * ldwork..], 1) + } + blas.dtrmm(.right, .upper, .no_trans, .unit, m, k, 1.0, v[(n - k) * ldv..], ldv, mut + work, ldwork) + if n > k { + blas.dgemm(.no_trans, .no_trans, m, k, n - k, 1.0, c, ldc, v, ldv, 1.0, mut + work, ldwork) + } + blas.dtrmm(.right, .lower, trans, .non_unit, m, k, 1.0, t, ldt, mut work, ldwork) + if n > k { + blas.dgemm(.no_trans, .trans, m, n - k, k, -1.0, work, ldwork, v, ldv, 1.0, mut + c, ldc) + } + blas.dtrmm(.right, .upper, .trans, .unit, m, k, 1.0, v[(n - k) * ldv..], ldv, mut + work, ldwork) + for i := 0; i < m; i++ { + for j := 0; j < k; j++ { + c[i * ldc + (n - k + j)] -= unsafe { work[i * ldwork + j] } + } + } + return + } + if direct == .forward { + if side == .left { + for j := 0; j < k; j++ { + blas.dcopy(n, c[j * ldc..], 1, mut work[j * ldwork..], 1) + } + blas.dtrmm(.right, .upper, .trans, .unit, n, k, 1.0, v, ldv, mut work, ldwork) + if m > k { + blas.dgemm(.trans, .trans, n, k, m - k, 1.0, c[k * ldc..], ldc, v[k..], + ldv, 1.0, mut work, ldwork) + } + blas.dtrmm(.right, .upper, transt, .non_unit, n, k, 1.0, t, ldt, mut work, + ldwork) + if m > k { + blas.dgemm(.trans, .trans, m - k, n, k, -1.0, v[k..], ldv, work, ldwork, + 1.0, mut c[k * ldc..], ldc) + } + blas.dtrmm(.right, .upper, .no_trans, .unit, n, k, 1.0, v, ldv, mut work, + ldwork) + for i := 0; i < n; i++ { + for j := 0; j < k; j++ { + c[j * ldc + i] -= unsafe { work[i * ldwork + j] } + } + } + return + } + for j := 0; j < k; j++ { + blas.dcopy(m, c[j..], ldc, mut work[j * ldwork..], 1) + } + blas.dtrmm(.right, .upper, .trans, .unit, m, k, 1.0, v, ldv, mut work, ldwork) + if n > k { + blas.dgemm(.no_trans, .trans, m, k, n - k, 1.0, c[k..], ldc, v[k..], ldv, + 1.0, mut work, ldwork) + } + blas.dtrmm(.right, .upper, trans, .non_unit, m, k, 1.0, t, ldt, mut work, ldwork) + if n > k { + blas.dgemm(.no_trans, .trans, m, n - k, k, -1.0, work, ldwork, v[k..], ldv, + 1.0, mut c[k..], ldc) + } + blas.dtrmm(.right, .upper, .no_trans, .unit, m, k, 1.0, v, ldv, mut work, ldwork) + for i := 0; i < m; i++ { + for j := 0; j < k; j++ { + c[i * ldc + j] -= unsafe { work[i * ldwork + j] } + } + } + return + } + if side == .left { + for j := 0; j < k; j++ { + blas.dcopy(n, c[(m - k + j) * ldc..], 1, mut work[j * ldwork..], 1) + } + blas.dtrmm(.right, .lower, .no_trans, .unit, n, k, 1.0, v[(m - k)..], ldv, mut + work, ldwork) + if m > k { + blas.dgemm(.trans, .no_trans, n, k, m - k, 1.0, c, ldc, v, ldv, 1.0, mut work, + ldwork) + } + blas.dtrmm(.right, .lower, transt, .non_unit, n, k, 1.0, t, ldt, mut work, ldwork) + if m > k { + blas.dgemm(.no_trans, .trans, m - k, n, k, -1.0, v, ldv, work, ldwork, 1.0, mut + c, ldc) + } + blas.dtrmm(.right, .lower, .trans, .unit, n, k, 1.0, v[(m - k)..], ldv, mut work, + ldwork) + for i := 0; i < n; i++ { + for j := 0; j < k; j++ { + c[(m - k + j) * ldc + i] -= unsafe { work[i * ldwork + j] } + } + } + return + } + for j := 0; j < k; j++ { + blas.dcopy(m, c[(n - k + j)..], ldc, mut work[j * ldwork..], 1) + } + blas.dtrmm(.right, .lower, .no_trans, .unit, m, k, 1.0, v[(n - k)..], ldv, mut work, + ldwork) + if n > k { + blas.dgemm(.no_trans, .trans, m, k, n - k, 1.0, c, ldc, v, ldv, 1.0, mut work, + ldwork) + } + blas.dtrmm(.right, .lower, trans, .non_unit, m, k, 1.0, t, ldt, mut work, ldwork) + if n > k { + blas.dgemm(.no_trans, .no_trans, m, n - k, k, -1.0, work, ldwork, v, ldv, 1.0, mut + c, ldc) + } + blas.dtrmm(.right, .lower, .trans, .unit, m, k, 1.0, v[(n - k)..], ldv, mut work, + ldwork) + for i := 0; i < m; i++ { + for j := 0; j < k; j++ { + c[i * ldc + (n - k + j)] -= unsafe { work[i * ldwork + j] } + } + } +} diff --git a/lapack/lapack64/dlarfg.v b/lapack/lapack64/dlarfg.v new file mode 100644 index 000000000..d870c3691 --- /dev/null +++ b/lapack/lapack64/dlarfg.v @@ -0,0 +1,51 @@ +module lapack64 + +import math +import vsl.blas + +pub fn dlarfg(n int, alpha f64, mut x []f64, incx int) (f64, f64) { + if n < 0 { + panic(n_lt0) + } + if incx <= 0 { + panic(bad_inc_x) + } + + if n <= 1 { + return alpha, 0 + } + + if x.len < 1 + (n - 2) * math.abs(incx) { + panic(short_x) + } + + mut xnorm := blas.dnrm2(n - 1, x, incx) + if xnorm == 0 { + return alpha, 0 + } + mut beta := -math.copysign(dlapy2(alpha, xnorm), alpha) + safmin := dlamch_s / dlamch_e + mut knt := 0 + mut alpha_ := alpha + if math.abs(beta) < safmin { + // xnorm and beta may be inaccurate, scale x and recompute. + rsafmn := 1 / safmin + for { + knt++ + blas.dscal(n - 1, rsafmn, mut x, incx) + beta *= rsafmn + alpha_ *= rsafmn + if math.abs(beta) >= safmin { + break + } + } + xnorm = blas.dnrm2(n - 1, x, incx) + beta = -math.copysign(dlapy2(alpha_, xnorm), alpha_) + } + mut tau := (beta - alpha_) / beta + blas.dscal(n - 1, 1 / (alpha_ - beta), mut x, incx) + for _ in 0 .. knt { + beta *= safmin + } + return beta, tau +} diff --git a/lapack/lapack64/dlarft.v b/lapack/lapack64/dlarft.v new file mode 100644 index 000000000..e7ef8825a --- /dev/null +++ b/lapack/lapack64/dlarft.v @@ -0,0 +1,155 @@ +module lapack64 + +import math +import vsl.blas + +// dlarft forms the triangular factor T of a block reflector H, storing the answer +// in t. +// +// H = I - V * T * Vᵀ if store == .column_wise +// H = I - Vᵀ * T * V if store == .row_wise +// +// H is defined by a product of the elementary reflectors where +// +// H = H_0 * H_1 * ... * H_{k-1} if direct == .forward +// H = H_{k-1} * ... * H_1 * H_0 if direct == .backward +// +// t is a k×k triangular matrix. t is upper triangular if direct = .forward +// and lower triangular otherwise. This function will panic if t is not of +// sufficient size. +// +// store describes the storage of the elementary reflectors in v. See +// dlarfb for a description of layout. +// +// tau contains the scalar factors of the elementary reflectors H_i. +// +// dlarft is an internal routine. It is exported for testing purposes. +pub fn dlarft(direct Direct, store StoreV, n int, k int, v []f64, ldv int, tau []f64, mut t []f64, ldt int) { + mv, nv := if store == .row_wise { k, n } else { n, k } + if direct != .forward && direct != .backward { + panic(bad_direct) + } + if store != .row_wise && store != .column_wise { + panic(bad_store_v) + } + if n < 0 { + panic(n_lt0) + } + if k < 1 { + panic(k_lt1) + } + if ldv < math.max(1, nv) { + panic(bad_ld_v) + } + if tau.len < k { + panic(short_tau) + } + if ldt < math.max(1, k) { + panic(short_t) + } + + if n == 0 { + return + } + + if v.len < (mv - 1) * ldv + nv { + panic(short_v) + } + if t.len < (k - 1) * ldt + k { + panic(short_t) + } + + if direct == .forward { + mut prevlastv := n - 1 + for i := 0; i < k; i++ { + prevlastv = math.max(i, prevlastv) + if tau[i] == 0 { + for j := 0; j <= i; j++ { + t[j * ldt + i] = 0 + } + continue + } + mut lastv := 0 + if store == .column_wise { + // skip trailing zeros + for lastv = n - 1; lastv >= i + 1; lastv-- { + if v[lastv * ldv + i] != 0 { + break + } + } + for j := 0; j < i; j++ { + t[j * ldt + i] = -tau[i] * v[i * ldv + j] + } + j := math.min(lastv, prevlastv) + blas.dgemv(.trans, j - i, i, -tau[i], v[(i + 1) * ldv..], ldv, v[(i + 1) * ldv + i..], + ldv, 1.0, mut t[i..], ldt) + } else { + for lastv = n - 1; lastv >= i + 1; lastv-- { + if v[i * ldv + lastv] != 0 { + break + } + } + for j := 0; j < i; j++ { + t[j * ldt + i] = -tau[i] * v[j * ldv + i] + } + j := math.min(lastv, prevlastv) + blas.dgemv(.no_trans, i, j - i, -tau[i], v[i + 1..], ldv, v[i * ldv + i + 1..], + 1, 1.0, mut t[i..], ldt) + } + blas.dtrmv(.upper, .no_trans, .non_unit, i, t, ldt, mut t[i..], ldt) + t[i * ldt + i] = tau[i] + if i > 1 { + prevlastv = math.max(prevlastv, lastv) + } else { + prevlastv = lastv + } + } + return + } + + mut prevlastv := 0 + for i := k - 1; i >= 0; i-- { + if tau[i] == 0 { + for j := i; j < k; j++ { + t[j * ldt + i] = 0 + } + continue + } + mut lastv := 0 + if i < k - 1 { + if store == .column_wise { + for lastv = 0; lastv < i; lastv++ { + if v[lastv * ldv + i] != 0 { + break + } + } + for j := i + 1; j < k; j++ { + t[j * ldt + i] = -tau[i] * v[(n - k + i) * ldv + j] + } + j := math.max(lastv, prevlastv) + blas.dgemv(.trans, n - k + i - j, k - i - 1, -tau[i], v[j * ldv + i + 1..], + ldv, v[j * ldv + i..], ldv, 1.0, mut t[(i + 1) * ldt + i..], ldt) + } else { + for lastv = 0; lastv < i; lastv++ { + if v[i * ldv + lastv] != 0 { + break + } + } + for j := i + 1; j < k; j++ { + t[j * ldt + i] = -tau[i] * v[j * ldv + n - k + i] + } + j := math.max(lastv, prevlastv) + blas.dgemv(.no_trans, k - i - 1, n - k + i - j, -tau[i], v[(i + 1) * ldv + j..], + ldv, v[i * ldv + j..], 1, 1.0, mut t[(i + 1) * ldt + i..], ldt) + } + blas.dtrmv(.lower, .no_trans, .non_unit, k - i - 1, t[(i + 1) * ldt + i + 1..], + ldt, mut t[(i + 1) * ldt + i..], ldt) + if i > 0 { + prevlastv = math.min(prevlastv, lastv) + } else { + prevlastv = lastv + } + } + t[i * ldt + i] = tau[i] + } +} diff --git a/lapack/lapack64/dlartg.v b/lapack/lapack64/dlartg.v new file mode 100644 index 000000000..b4282092e --- /dev/null +++ b/lapack/lapack64/dlartg.v @@ -0,0 +1,56 @@ +module lapack64 + +import math + +// dlartg generates a plane rotation so that +// +// [ cs sn] * [f] = [r] +// [-sn cs] [g] = [0] +// +// where cs*cs + sn*sn = 1. +// +// This is a more accurate version of BLAS Drotg that uses scaling to avoid +// overflow or underflow, with the other differences that +// - cs >= 0 +// - if g = 0, then cs = 1 and sn = 0 +// - if f = 0 and g != 0, then cs = 0 and sn = sign(1,g) +// +// dlartg is an internal routine. It is exported for testing purposes. +pub fn dlartg(f f64, g f64) (f64, f64, f64) { + if g == 0 { + return 1, 0, f + } + + g1 := math.abs(g) + + if f == 0 { + return 0, math.copysign(1, g), g1 + } + + safmin := dlamch_s + safmax := 1 / safmin + rtmin := math.sqrt(safmin) + rtmax := math.sqrt(safmax / 2) + + f1 := math.abs(f) + + if rtmin < f1 && f1 < rtmax && rtmin < g1 && g1 < rtmax { + d := math.sqrt(f * f + g * g) + cs := f1 / d + r := math.copysign(d, f) + sn := g / r + + return cs, sn, r + } + + u := math.min(math.max(safmin, math.max(f1, g1)), safmax) + fs := f / u + gs := g / u + d := math.sqrt(fs * fs + gs * gs) + cs := math.abs(fs) / d + mut r := math.copysign(d, f) + sn := gs / r + r *= u + + return cs, sn, r +} diff --git a/lapack/lapack64/dlaset.v b/lapack/lapack64/dlaset.v new file mode 100644 index 000000000..0ad06dbc7 --- /dev/null +++ b/lapack/lapack64/dlaset.v @@ -0,0 +1,59 @@ +module lapack64 + +import vsl.blas +import math + +// dlaset sets the off-diagonal elements of A to alpha, and the diagonal +// elements to beta. If uplo == blas.upper, only the elements in the upper +// triangular part are set. If uplo == blas.lower, only the elements in the +// lower triangular part are set. If uplo is otherwise, all of the elements of A +// are set. +// +// dlaset is an internal routine. It is exported for testing purposes. +pub fn dlaset(uplo blas.Uplo, m int, n int, alpha f64, beta f64, mut a []f64, lda int) { + if m < 0 { + panic(m_lt0) + } + if n < 0 { + panic(n_lt0) + } + if lda < math.max(1, n) { + panic(bad_ld_a) + } + + minmn := math.min(m, n) + if minmn == 0 { + return + } + + if a.len < (m - 1) * lda + n { + panic(short_a) + } + + match uplo { + .upper { + for i in 0 .. m { + for j in i + 1 .. n { + a[i * lda + j] = alpha + } + } + } + .lower { + for i in 0 .. m { + for j in 0 .. math.min(i, n) { + a[i * lda + j] = alpha + } + } + } + else { + for i in 0 .. m { + for j in 0 .. n { + a[i * lda + j] = alpha + } + } + } + } + for i in 0 .. minmn { + a[i * lda + i] = beta + } +} diff --git a/lapack/lapack64/dlasr.v b/lapack/lapack64/dlasr.v new file mode 100644 index 000000000..8c9c6a900 --- /dev/null +++ b/lapack/lapack64/dlasr.v @@ -0,0 +1,277 @@ +module lapack64 + +import math +import vsl.blas + +// Dlasr applies a sequence of plane rotations to the m×n matrix A. This series +// of plane rotations is implicitly represented by a matrix P. P is multiplied +// by a depending on the value of side -- A = P * A if side == Side.left, +// A = A * Pᵀ if side == Side.right. +// +// The exact value of P depends on the value of pivot, but in all cases P is +// implicitly represented by a series of 2×2 rotation matrices. The entries of +// rotation matrix k are defined by s[k] and c[k] +// +// R(k) = [ c[k] s[k]] +// [-s[k] s[k]] +// +// If direct == Direct.forward, the rotation matrices are applied as +// P = P(z-1) * ... * P(2) * P(1), while if direct == Direct.backward they are +// applied as P = P(1) * P(2) * ... * P(n). +// +// pivot defines the mapping of the elements in R(k) to P(k). +// If pivot == Pivot.variable, the rotation is performed for the (k, k+1) plane. +// +// P(k) = [1 ] +// [ ... ] +// [ 1 ] +// [ c[k] s[k] ] +// [ -s[k] c[k] ] +// [ 1 ] +// [ ... ] +// [ 1] +// +// if pivot == Pivot.top, the rotation is performed for the (1, k+1) plane, +// +// P(k) = [c[k] s[k] ] +// [ 1 ] +// [ ... ] +// [ 1 ] +// [-s[k] c[k] ] +// [ 1 ] +// [ ... ] +// [ 1] +// +// and if pivot == Pivot.bottom, the rotation is performed for the (k, z) plane. +// +// P(k) = [1 ] +// [ ... ] +// [ 1 ] +// [ c[k] s[k]] +// [ 1 ] +// [ ... ] +// [ 1 ] +// [ -s[k] c[k]] +// +// s and c have length m - 1 if side == Side.left, and n - 1 if side == Side.right. +// +pub fn dlasr(side blas.Side, pivot Pivot, direct Direct, m int, n int, c []f64, s []f64, mut a []f64, lda int) { + if side != .left && side != .right { + panic(bad_side) + } + if pivot != .variable && pivot != .top && pivot != .bottom { + panic(bad_pivot) + } + if direct != .forward && direct != .backward { + panic(bad_direct) + } + if m < 0 { + panic(m_lt0) + } + if n < 0 { + panic(n_lt0) + } + if lda < math.max(1, n) { + panic(bad_ld_a) + } + + // Quick return if possible. + if m == 0 || n == 0 { + return + } + + if side == .left { + if c.len < m - 1 { + panic(short_c) + } + if s.len < m - 1 { + panic(short_s) + } + } else { + if c.len < n - 1 { + panic(short_c) + } + if s.len < n - 1 { + panic(short_s) + } + } + if a.len < (m - 1) * lda + n { + panic(short_a) + } + + if side == .left { + if pivot == .variable { + if direct == .forward { + for j := 0; j < m - 1; j++ { + ctmp := c[j] + stmp := s[j] + if ctmp != 1 || stmp != 0 { + for i := 0; i < n; i++ { + tmp2 := a[j * lda + i] + tmp := a[(j + 1) * lda + i] + a[(j + 1) * lda + i] = ctmp * tmp - stmp * tmp2 + a[j * lda + i] = stmp * tmp + ctmp * tmp2 + } + } + } + return + } + for j := m - 2; j >= 0; j-- { + ctmp := c[j] + stmp := s[j] + if ctmp != 1 || stmp != 0 { + for i := 0; i < n; i++ { + tmp2 := a[j * lda + i] + tmp := a[(j + 1) * lda + i] + a[(j + 1) * lda + i] = ctmp * tmp - stmp * tmp2 + a[j * lda + i] = stmp * tmp + ctmp * tmp2 + } + } + } + return + } else if pivot == .top { + if direct == .forward { + for j := 1; j < m; j++ { + ctmp := c[j - 1] + stmp := s[j - 1] + if ctmp != 1 || stmp != 0 { + for i := 0; i < n; i++ { + tmp := a[j * lda + i] + tmp2 := a[i] + a[j * lda + i] = ctmp * tmp - stmp * tmp2 + a[i] = stmp * tmp + ctmp * tmp2 + } + } + } + return + } + for j := m - 1; j >= 1; j-- { + ctmp := c[j - 1] + stmp := s[j - 1] + if ctmp != 1 || stmp != 0 { + for i := 0; i < n; i++ { + tmp := a[j * lda + i] + tmp2 := a[i] + a[j * lda + i] = ctmp * tmp - stmp * tmp2 + a[i] = stmp * tmp + ctmp * tmp2 + } + } + } + return + } + if direct == .forward { + for j := 0; j < m - 1; j++ { + ctmp := c[j] + stmp := s[j] + if ctmp != 1 || stmp != 0 { + for i := 0; i < n; i++ { + tmp := a[j * lda + i] + tmp2 := a[(m - 1) * lda + i] + a[j * lda + i] = stmp * tmp2 + ctmp * tmp + a[(m - 1) * lda + i] = ctmp * tmp2 - stmp * tmp + } + } + } + return + } + for j := m - 2; j >= 0; j-- { + ctmp := c[j] + stmp := s[j] + if ctmp != 1 || stmp != 0 { + for i := 0; i < n; i++ { + tmp := a[j * lda + i] + tmp2 := a[(m - 1) * lda + i] + a[j * lda + i] = stmp * tmp2 + ctmp * tmp + a[(m - 1) * lda + i] = ctmp * tmp2 - stmp * tmp + } + } + } + return + } + if pivot == .variable { + if direct == .forward { + for j := 0; j < n - 1; j++ { + ctmp := c[j] + stmp := s[j] + if ctmp != 1 || stmp != 0 { + for i := 0; i < m; i++ { + tmp := a[i * lda + j + 1] + tmp2 := a[i * lda + j] + a[i * lda + j + 1] = ctmp * tmp - stmp * tmp2 + a[i * lda + j] = stmp * tmp + ctmp * tmp2 + } + } + } + return + } + for j := n - 2; j >= 0; j-- { + ctmp := c[j] + stmp := s[j] + if ctmp != 1 || stmp != 0 { + for i := 0; i < m; i++ { + tmp := a[i * lda + j + 1] + tmp2 := a[i * lda + j] + a[i * lda + j + 1] = ctmp * tmp - stmp * tmp2 + a[i * lda + j] = stmp * tmp + ctmp * tmp2 + } + } + } + return + } else if pivot == .top { + if direct == .forward { + for j := 1; j < n; j++ { + ctmp := c[j - 1] + stmp := s[j - 1] + if ctmp != 1 || stmp != 0 { + for i := 0; i < m; i++ { + tmp := a[i * lda + j] + tmp2 := a[i * lda] + a[i * lda + j] = ctmp * tmp - stmp * tmp2 + a[i * lda] = stmp * tmp + ctmp * tmp2 + } + } + } + return + } + for j := n - 1; j >= 1; j-- { + ctmp := c[j - 1] + stmp := s[j - 1] + if ctmp != 1 || stmp != 0 { + for i := 0; i < m; i++ { + tmp := a[i * lda + j] + tmp2 := a[i * lda] + a[i * lda + j] = ctmp * tmp - stmp * tmp2 + a[i * lda] = stmp * tmp + ctmp * tmp2 + } + } + } + return + } + if direct == .forward { + for j := 0; j < n - 1; j++ { + ctmp := c[j] + stmp := s[j] + if ctmp != 1 || stmp != 0 { + for i := 0; i < m; i++ { + tmp := a[i * lda + j] + tmp2 := a[i * lda + n - 1] + a[i * lda + j] = stmp * tmp2 + ctmp * tmp + a[i * lda + n - 1] = ctmp * tmp2 - stmp * tmp + } + } + } + return + } + for j := n - 2; j >= 0; j-- { + ctmp := c[j] + stmp := s[j] + if ctmp != 1 || stmp != 0 { + for i := 0; i < m; i++ { + tmp := a[i * lda + j] + tmp2 := a[i * lda + n - 1] + a[i * lda + j] = stmp * tmp2 + ctmp * tmp + a[i * lda + n - 1] = ctmp * tmp2 - stmp * tmp + } + } + } +} diff --git a/lapack/lapack64/dlatrd.v b/lapack/lapack64/dlatrd.v new file mode 100644 index 000000000..3469624d5 --- /dev/null +++ b/lapack/lapack64/dlatrd.v @@ -0,0 +1,108 @@ +module lapack64 + +import math +import vsl.blas + +pub fn dlatrd(uplo blas.Uplo, n int, nb int, mut a []f64, lda int, mut e []f64, mut tau []f64, mut w []f64, ldw int) { + if uplo != .upper && uplo != .lower { + panic(bad_uplo) + } + if n < 0 { + panic(n_lt0) + } + if nb < 0 { + panic(nb_lt0) + } + if nb > n { + panic(nb_gtn) + } + if lda < math.max(1, n) { + panic(bad_ld_a) + } + if ldw < math.max(1, nb) { + panic(bad_ld_w) + } + + if n == 0 { + return + } + + if a.len < (n - 1) * lda + n { + panic(short_a) + } + if w.len < (n - 1) * ldw + nb { + panic(short_w) + } + if e.len < n - 1 { + panic(short_e) + } + if tau.len < n - 1 { + panic(short_tau) + } + + if uplo == .upper { + for i := n - 1; i >= n - nb; i-- { + iw := i - n + nb + if i < n - 1 { + // Update A(0:i, i). + blas.dgemv(.no_trans, i + 1, n - i - 1, -1, a[i + 1..], lda, w[i * ldw + iw + 1..], + 1, 1, mut a[i..], lda) + blas.dgemv(.no_trans, i + 1, n - i - 1, -1, w[iw + 1..], ldw, a[i * lda + i + 1..], + 1, 1, mut a[i..], lda) + } + if i > 0 { + // Generate elementary reflector H_i to annihilate A(0:i-2,i). + e[i - 1], tau[i - 1] = dlarfg(i, a[(i - 1) * lda + i], mut a[i..], lda) + a[(i - 1) * lda + i] = 1 + + // Compute W(0:i-1, i). + blas.dsymv(.upper, i, 1, a, lda, a[i..], lda, 0, mut w[iw..], ldw) + if i < n - 1 { + blas.dgemv(.trans, i, n - i - 1, 1, w[iw + 1..], ldw, a[i..], lda, + 0, mut w[(i + 1) * ldw + iw..], ldw) + blas.dgemv(.no_trans, i, n - i - 1, -1, a[i + 1..], lda, w[(i + 1) * ldw + iw..], + ldw, 1, mut w[iw..], ldw) + blas.dgemv(.trans, i, n - i - 1, 1, a[i + 1..], lda, a[i..], lda, + 0, mut w[(i + 1) * ldw + iw..], ldw) + blas.dgemv(.no_trans, i, n - i - 1, -1, w[iw + 1..], ldw, w[(i + 1) * ldw + iw..], + ldw, 1, mut w[iw..], ldw) + } + blas.dscal(i, tau[i - 1], mut w[iw..], ldw) + alpha := -0.5 * tau[i - 1] * blas.ddot(i, w[iw..], ldw, a[i..], lda) + blas.daxpy(i, alpha, a[i..], lda, mut w[iw..], ldw) + } + } + } else { + // Reduce first nb columns of lower triangle. + for i := 0; i < nb; i++ { + // Update A(i:n, i) + blas.dgemv(.no_trans, n - i, i, -1, a[i * lda..], lda, w[i * ldw..], 1, 1, mut + a[i * lda + i..], lda) + blas.dgemv(.no_trans, n - i, i, -1, w[i * ldw..], ldw, a[i * lda..], 1, 1, mut + a[i * lda + i..], lda) + if i < n - 1 { + // Generate elementary reflector H_i to annihilate A(i+2:n,i). + e[i], tau[i] = dlarfg(n - i - 1, a[(i + 1) * lda + i], mut a[math.min(i + + 2, n - 1) * lda + i..], lda) + a[(i + 1) * lda + i] = 1 + + // Compute W(i+1:n,i). + blas.dsymv(.lower, n - i - 1, 1, a[(i + 1) * lda + i + 1..], lda, a[(i + 1) * lda + + i..], lda, 0, mut w[(i + 1) * ldw + i..], ldw) + blas.dgemv(.trans, n - i - 1, i, 1, w[(i + 1) * ldw..], ldw, a[(i + 1) * lda + i..], + lda, 0, mut w[i..], ldw) + blas.dgemv(.no_trans, n - i - 1, i, -1, a[(i + 1) * lda..], lda, w[i..], + ldw, 1, mut w[(i + 1) * ldw + i..], ldw) + blas.dgemv(.trans, n - i - 1, i, 1, a[(i + 1) * lda..], lda, a[(i + 1) * lda + i..], + lda, 0, mut w[i..], ldw) + blas.dgemv(.no_trans, n - i - 1, i, -1, w[(i + 1) * ldw..], ldw, w[i..], + ldw, 1, mut w[(i + 1) * ldw + i..], ldw) + blas.dscal(n - i - 1, tau[i], mut w[(i + 1) * ldw + i..], ldw) + alpha := -0.5 * tau[i] * blas.ddot(n - i - 1, w[(i + 1) * ldw + i..], + ldw, a[(i + 1) * lda + i..], lda) + blas.daxpy(n - i - 1, alpha, a[(i + 1) * lda + i..], lda, mut w[(i + 1) * ldw + i..], + ldw) + } + } + } +} diff --git a/lapack/lapack64/dorg2l.v b/lapack/lapack64/dorg2l.v index 2c2070c79..c11963980 100644 --- a/lapack/lapack64/dorg2l.v +++ b/lapack/lapack64/dorg2l.v @@ -64,7 +64,7 @@ pub fn dorg2l(m int, n int, k int, mut a []f64, lda int, tau []f64, mut work []f // Apply H_i to A[0:m-k+i, 0:n-k+i] from the left. a[(m - n + ii) * lda + ii] = 1 - dlarf(.left, m - n + ii + 1, ii, mut a[ii..], lda, tau[i], mut a, lda, mut work) + dlarf(.left, m - n + ii + 1, ii, a[ii..], lda, tau[i], mut a, lda, mut work) blas.dscal(m - n + ii, -tau[i], mut a[ii..], lda) a[(m - n + ii) * lda + ii] = 1 - tau[i] diff --git a/lapack/lapack64/dorg2r.v b/lapack/lapack64/dorg2r.v new file mode 100644 index 000000000..6947b7fb4 --- /dev/null +++ b/lapack/lapack64/dorg2r.v @@ -0,0 +1,66 @@ +module lapack64 + +import math +import vsl.blas + +pub fn dorg2r(m int, n int, k int, mut a []f64, lda int, tau []f64, mut work []f64) { + if m < 0 { + panic(m_lt0) + } + if n < 0 { + panic(n_lt0) + } + if n > m { + panic(n_gtm) + } + if k < 0 { + panic(k_lt0) + } + if k > n { + panic(k_gtn) + } + if lda < math.max(1, n) { + panic(bad_ld_a) + } + + if n == 0 { + return + } + + if a.len < (m - 1) * lda + n { + panic(short_a) + } + if tau.len != k { + panic(bad_len_tau) + } + if work.len < n { + panic(short_work) + } + + // Initialize columns k+1:n to columns of the unit matrix. + for l := 0; l < m; l++ { + for j := k; j < n; j++ { + a[l * lda + j] = 0.0 + } + } + for j := k; j < n; j++ { + a[j * lda + j] = 1.0 + } + for i := k - 1; i >= 0; i-- { + for mut elem in work { + elem = 0.0 + } + if i < n - 1 { + a[i * lda + i] = 1.0 + dlarf(.left, m - i, n - i - 1, a[(i * lda + i)..], lda, tau[i], mut a[(i * lda + i + 1)..], + lda, mut work) + } + if i < m - 1 { + blas.dscal(m - i - 1, -tau[i], mut a[(i + 1) * lda + i..], lda) + } + a[i * lda + i] = 1.0 - tau[i] + for l := 0; l < i; l++ { + a[l * lda + i] = 0.0 + } + } +} diff --git a/lapack/lapack64/dorgql.v b/lapack/lapack64/dorgql.v index b16636e57..92b28cf5a 100644 --- a/lapack/lapack64/dorgql.v +++ b/lapack/lapack64/dorgql.v @@ -116,8 +116,8 @@ pub fn dorgql(m int, n int, k int, mut a []f64, lda int, tau []f64, mut work []f if n - k + i > 0 { // Form the triangular factor of the block reflector // H = H_{i+ib-1} * ... * H_{i+1} * H_i. - dlarft(.backward, .column_wise, m - k + i + ib, ib, mut a[n - k + i..], - lda, tau[i..], mut work, ldwork) + dlarft(.backward, .column_wise, m - k + i + ib, ib, a[n - k + i..], lda, + tau[i..], mut work, ldwork) // Apply H to A[0:m-k+i+ib, 0:n-k+i] from the left. dlarfb(.left, .no_trans, .backward, .column_wise, m - k + i + ib, n - k + i, diff --git a/lapack/lapack64/dorgqr.v b/lapack/lapack64/dorgqr.v new file mode 100644 index 000000000..d2d14eb51 --- /dev/null +++ b/lapack/lapack64/dorgqr.v @@ -0,0 +1,104 @@ +module lapack64 + +import math +import vsl.blas + +pub fn dorgqr(m int, n int, k int, mut a []f64, lda int, tau []f64, mut work []f64, lwork int) { + if m < 0 { + panic(m_lt0) + } + if n < 0 { + panic(n_lt0) + } + if n > m { + panic(n_gtm) + } + if k < 0 { + panic(k_lt0) + } + if k > n { + panic(k_gtn) + } + if lda < math.max(1, n) && lwork != -1 { + panic(bad_ld_a) + } + if lwork < math.max(1, n) && lwork != -1 { + panic(bad_l_work) + } + if work.len < math.max(1, lwork) { + panic(short_work) + } + + if n == 0 { + work[0] = 1.0 + return + } + + mut nb := ilaenv(1, 'DORGQR', ' ', m, n, k, -1) + if lwork == -1 { + work[0] = f64(n * nb) + return + } + + if a.len < (m - 1) * lda + n { + panic(short_a) + } + if tau.len != k { + panic(bad_len_tau) + } + + mut nbmin := 2 + mut nx := 0 + mut iws := n + mut ldwork := 0 + if 1 < nb && nb < k { + nx = math.max(0, ilaenv(3, 'DORGQR', ' ', m, n, k, -1)) + if nx < k { + ldwork = nb + iws = n * ldwork + if lwork < iws { + nb = lwork / n + ldwork = nb + nbmin = math.max(2, ilaenv(2, 'DORGQR', ' ', m, n, k, -1)) + } + } + } + mut ki := 0 + mut kk := 0 + if nbmin <= nb && nb < k && nx < k { + ki = ((k - nx - 1) / nb) * nb + kk = math.min(k, ki + nb) + for i := 0; i < kk; i++ { + for j := kk; j < n; j++ { + unsafe { + a[i * lda + j] = 0.0 + } + } + } + } + if kk < n { + dorg2r(m - kk, n - kk, k - kk, mut a[(kk * lda + kk)..], lda, tau[kk..], mut work) + } + if kk > 0 { + for i := ki; i >= 0; i -= nb { + ib := math.min(nb, k - i) + if i + ib < n { + dlarft(.forward, .column_wise, m - i, ib, a[(i * lda + i)..], lda, tau[i..], mut + work, ldwork) + + dlarfb(.left, .no_trans, .forward, .column_wise, m - i, n - i - ib, ib, + a[(i * lda + i)..], lda, work, ldwork, mut a[(i * lda + i + ib)..], + lda, mut work[(ib * ldwork)..], ldwork) + } + dorg2r(m - i, ib, ib, mut a[(i * lda + i)..], lda, tau[i..(i + ib)], mut work) + for j := i; j < i + ib; j++ { + for l := 0; l < i; l++ { + unsafe { + a[l * lda + j] = 0.0 + } + } + } + } + } + work[0] = f64(iws) +} diff --git a/lapack/lapack64/dsteqr.v b/lapack/lapack64/dsteqr.v new file mode 100644 index 000000000..ce61a1f63 --- /dev/null +++ b/lapack/lapack64/dsteqr.v @@ -0,0 +1,375 @@ +module lapack64 + +import math +import vsl.blas + +// dsteqr computes the eigenvalues and optionally the eigenvectors of a symmetric +// tridiagonal matrix using the implicit QL or QR method. The eigenvectors of a +// full or band symmetric matrix can also be found if dsytrd, dsptrd, or dsbtrd +// have been used to reduce this matrix to tridiagonal form. +// +// d, on entry, contains the diagonal elements of the tridiagonal matrix. On exit, +// d contains the eigenvalues in ascending order. d must have length n and +// dsteqr will panic otherwise. +// +// e, on entry, contains the off-diagonal elements of the tridiagonal matrix on +// entry, and is overwritten during the call to dsteqr. e must have length n-1 and +// dsteqr will panic otherwise. +// +// z, on entry, contains the n×n orthogonal matrix used in the reduction to +// tridiagonal form if compz == lapack.EVOrig. On exit, if +// compz == lapack.EVOrig, z contains the orthonormal eigenvectors of the +// original symmetric matrix, and if compz == lapack.EVTridiag, z contains the +// orthonormal eigenvectors of the symmetric tridiagonal matrix. z is not used +// if compz == lapack.EVCompNone. +// +// work must have length at least max(1, 2*n-2) if the eigenvectors are computed, +// and dsteqr will panic otherwise. +// +// dsteqr is an internal routine. It is exported for testing purposes. +pub fn dsteqr(compz EVComp, n int, mut d []f64, mut e []f64, mut z []f64, ldz int, mut work []f64) bool { + if compz != .ev_comp_none && compz != .ev_tridiag && compz != .ev_orig { + panic('bad_ev_comp') + } + if n < 0 { + panic('n < 0') + } + if ldz < 1 || (compz != .ev_comp_none && ldz < n) { + panic('bad_ldz') + } + + // Quick return if possible. + if n == 0 { + return true + } + + if d.len < n { + panic('short d') + } + if e.len < n - 1 { + panic('short e') + } + if compz != .ev_comp_none && z.len < (n - 1) * ldz + n { + panic('short z') + } + if compz != .ev_comp_none && work.len < math.max(1, 2 * n - 2) { + panic('short work') + } + + mut icompz := 0 + if compz == .ev_orig { + icompz = 1 + } else if compz == .ev_tridiag { + icompz = 2 + } + + if n == 1 { + if icompz == 2 { + z[0] = 1 + } + return true + } + + eps := dlamch_e + eps2 := eps * eps + safmin := dlamch_s + safmax := 1 / safmin + ssfmax := math.sqrt(safmax) / 3 + ssfmin := math.sqrt(safmin) / eps2 + + // Compute the eigenvalues and eigenvectors of the tridiagonal matrix. + if icompz == 2 { + dlaset(.all, n, n, 0, 1, mut z, ldz) + } + maxit := 30 + nmaxit := n * maxit + + mut jtot := 0 + + // Determine where the matrix splits and choose QL or QR iteration for each + // block, according to whether top or bottom diagonal element is smaller. + mut l1 := 0 + nm1 := n - 1 + + down := 1 + up := 2 + mut iscale := 0 + + for { + if l1 > n - 1 { + // Order eigenvalues and eigenvectors. + if icompz == 0 { + dlasrt(.sort_increasing, n, mut d) + } else { + for ii := 1; ii < n; ii++ { + i := ii - 1 + mut k := i + mut p := d[i] + for j := ii; j < n; j++ { + if d[j] < p { + k = j + p = d[j] + } + } + if k != i { + d[k] = d[i] + d[i] = p + blas.dswap(n, mut z[i..], ldz, mut z[k..], ldz) + } + } + } + return true + } + if l1 > 0 { + e[l1 - 1] = 0 + } + mut m := 0 + if l1 <= nm1 { + for m = l1; m < nm1; m++ { + test := math.abs(e[m]) + if test == 0 { + break + } + if test <= (math.sqrt(math.abs(d[m])) * math.sqrt(math.abs(d[m + 1]))) * eps { + e[m] = 0 + break + } + } + } + mut l := l1 + lsv := l + mut lend := m + lendsv := lend + l1 = m + 1 + if lend == l { + continue + } + + // Scale submatrix in rows and columns L to Lend + anorm := dlanst(.max_abs, lend - l + 1, d[l..], e[l..]) + match anorm { + 0 { + continue + } + ssfmax { + iscale = down + // Pretend that d and e are matrices with 1 column. + dlascl(.general, 0, 0, anorm, ssfmax, lend - l + 1, 1, mut d[l..], 1) + dlascl(.general, 0, 0, anorm, ssfmax, lend - l, 1, mut e[l..], 1) + } + ssfmin { + iscale = up + dlascl(.general, 0, 0, anorm, ssfmin, lend - l + 1, 1, mut d[l..], 1) + dlascl(.general, 0, 0, anorm, ssfmin, lend - l, 1, mut e[l..], 1) + } + else {} + } + + // Choose between QL and QR. + if math.abs(d[lend]) < math.abs(d[l]) { + lend = lsv + l = lendsv + } + if lend > l { + // QL Iteration. Look for small subdiagonal element. + for { + if l != lend { + for m = l; m < lend; m++ { + v := math.abs(e[m]) + if v * v <= (eps2 * math.abs(d[m])) * math.abs(d[m + 1]) + safmin { + break + } + } + } else { + m = lend + } + if m < lend { + e[m] = 0 + } + mut p := d[l] + if m == l { + // Eigenvalue found. + l++ + if l > lend { + break + } + continue + } + + // If remaining matrix is 2×2, use dlaev2 to compute its eigensystem. + if m == l + 1 { + if icompz > 0 { + d[l], d[l + 1], work[l], work[n - 1 + l] = dlaev2(d[l], e[l], + d[l + 1]) + dlasr(.right, .variable, .backward, n, 2, work[l..], work[n - 1 + l..], mut + z[l..], ldz) + } else { + d[l], d[l + 1] = dlae2(d[l], e[l], d[l + 1]) + } + e[l] = 0 + l += 2 + if l > lend { + break + } + continue + } + + if jtot == nmaxit { + break + } + jtot++ + + // Form shift + mut g := (d[l + 1] - p) / (2 * e[l]) + mut r := dlapy2(g, 1) + g = d[m] - p + e[l] / (g + math.copysign(r, g)) + mut s := 1.0 + mut c := 1.0 + p = 0.0 + + // Inner loop + for i := m - 1; i >= l; i-- { + f := s * e[i] + b := c * e[i] + c, s, r = dlartg(g, f) + if i != m - 1 { + e[i + 1] = r + } + g = d[i + 1] - p + r = (d[i] - g) * s + 2 * c * b + p = s * r + d[i + 1] = g + p + g = c * r - b + + // If eigenvectors are desired, then save rotations. + if icompz > 0 { + work[i] = c + work[n - 1 + i] = -s + } + } + // If eigenvectors are desired, then apply saved rotations. + if icompz > 0 { + mm := m - l + 1 + dlasr(.right, .variable, .backward, n, mm, work[l..], work[n - 1 + l..], mut + z[l..], ldz) + } + d[l] -= p + e[l] = g + } + } else { + // QR Iteration. + // Look for small superdiagonal element. + for { + if l != lend { + for m = l; m > lend; m-- { + v := math.abs(e[m - 1]) + if v * v <= (eps2 * math.abs(d[m]) * math.abs(d[m - 1]) + safmin) { + break + } + } + } else { + m = lend + } + if m > lend { + e[m - 1] = 0 + } + mut p := d[l] + if m == l { + // Eigenvalue found + l-- + if l < lend { + break + } + continue + } + + // If remaining matrix is 2×2, use dlae2 to compute its eigenvalues. + if m == l - 1 { + if icompz > 0 { + d[l - 1], d[l], work[m], work[n - 1 + m] = dlaev2(d[l - 1], e[l - 1], + d[l]) + dlasr(.right, .variable, .forward, n, 2, work[m..], work[n - 1 + m..], mut + z[l - 1..], ldz) + } else { + d[l - 1], d[l] = dlae2(d[l - 1], e[l - 1], d[l]) + } + e[l - 1] = 0 + l -= 2 + if l < lend { + break + } + continue + } + if jtot == nmaxit { + break + } + jtot++ + + // Form shift. + mut g := (d[l - 1] - p) / (2 * e[l - 1]) + mut r := dlapy2(g, 1) + g = d[m] - p + (e[l - 1]) / (g + math.copysign(r, g)) + mut s := 1.0 + mut c := 1.0 + p = 0.0 + + // Inner loop. + for i := m; i < l; i++ { + f := s * e[i] + b := c * e[i] + c, s, r = dlartg(g, f) + if i != m { + e[i - 1] = r + } + g = d[i] - p + r = (d[i + 1] - g) * s + 2 * c * b + p = s * r + d[i] = g + p + g = c * r - b + + // If eigenvectors are desired, then save rotations. + if icompz > 0 { + work[i] = c + work[n - 1 + i] = s + } + } + + // If eigenvectors are desired, then apply saved rotations. + if icompz > 0 { + mm := l - m + 1 + dlasr(.right, .variable, .forward, n, mm, work[m..], work[n - 1 + m..], mut + z[m..], ldz) + } + d[l] -= p + e[l - 1] = g + } + } + + // Undo scaling if necessary. + match iscale { + down { + // Pretend that d and e are matrices with 1 column. + dlascl(.general, 0, 0, ssfmax, anorm, lendsv - lsv + 1, 1, mut d[lsv..], + 1) + dlascl(.general, 0, 0, ssfmax, anorm, lendsv - lsv, 1, mut e[lsv..], 1) + } + up { + dlascl(.general, 0, 0, ssfmin, anorm, lendsv - lsv + 1, 1, mut d[lsv..], + 1) + dlascl(.general, 0, 0, ssfmin, anorm, lendsv - lsv, 1, mut e[lsv..], 1) + } + else {} + } + + // Check for no convergence to an eigenvalue after a total of n*maxit iterations. + if jtot >= nmaxit { + break + } + } + for i := 0; i < n - 1; i++ { + if e[i] != 0 { + return false + } + } + return true +} diff --git a/lapack/lapack64/dsyev.v b/lapack/lapack64/dsyev.v index a65f019fa..12cf3638b 100644 --- a/lapack/lapack64/dsyev.v +++ b/lapack/lapack64/dsyev.v @@ -88,8 +88,8 @@ pub fn dsyev(jobz EVJob, uplo blas.Uplo, n int, mut a []f64, lda int, mut w []f6 panic('Dsterf failed') } } else { - dorgtr(uplo, n, mut a, lda, mut work[indtau..], mut work[indwork..], llwork) - if !dsteqr(EvComp(jobz), n, mut w, mut work[inde..], mut a, lda, mut work[indtau..]) { + dorgtr(uplo, n, mut a, lda, work[indtau..], mut work[indwork..], llwork) + if !dsteqr(EVComp(jobz), n, mut w, mut work[inde..], mut a, lda, mut work[indtau..]) { panic('Dsteqr failed') } } diff --git a/lapack/lapack64/dsytd2.v b/lapack/lapack64/dsytd2.v new file mode 100644 index 000000000..485440d7e --- /dev/null +++ b/lapack/lapack64/dsytd2.v @@ -0,0 +1,144 @@ +module lapack64 + +import math +import vsl.blas + +// Dsytd2 reduces a symmetric n×n matrix A to symmetric tridiagonal form T by +// an orthogonal similarity transformation +// +// Qᵀ * A * Q = T +// +// On entry, the matrix is contained in the specified triangle of a. On exit, +// if uplo == Uplo.upper, the diagonal and first super-diagonal of a are +// overwritten with the elements of T. The elements above the first super-diagonal +// are overwritten with the elementary reflectors that are used with +// the elements written to tau in order to construct Q. If uplo == Uplo.lower, +// the elements are written in the lower triangular region. +// +// d must have length at least n. e and tau must have length at least n-1. Dsytd2 +// will panic if these sizes are not met. +// +// Q is represented as a product of elementary reflectors. +// If uplo == Uplo.upper +// +// Q = H_{n-2} * ... * H_1 * H_0 +// +// and if uplo == Uplo.lower +// +// Q = H_0 * H_1 * ... * H_{n-2} +// +// where +// +// H_i = I - tau * v * vᵀ +// +// where tau is stored in tau[i], and v is stored in a. +// +// If uplo == Uplo.upper, v[0:i-1] is stored in A[0:i-1,i+1], v[i] = 1, and +// v[i+1:] = 0. The elements of a are +// +// [ d e v2 v3 v4] +// [ d e v3 v4] +// [ d e v4] +// [ d e] +// [ d] +// +// If uplo == Uplo.lower, v[0:i+1] = 0, v[i+1] = 1, and v[i+2:] is stored in +// A[i+2:n,i]. +// The elements of a are +// +// [ d ] +// [ e d ] +// [v1 e d ] +// [v1 v2 e d ] +// [v1 v2 v3 e d] +// +pub fn dsytd2(uplo blas.Uplo, n int, mut a []f64, lda int, mut d []f64, mut e []f64, mut tau []f64) { + if uplo != .upper && uplo != .lower { + panic(bad_uplo) + } + if n < 0 { + panic(n_lt0) + } + if lda < math.max(1, n) { + panic(bad_ld_a) + } + + // Quick return if possible. + if n == 0 { + return + } + + if a.len < (n - 1) * lda + n { + panic(short_a) + } + if d.len < n { + panic(short_d) + } + if e.len < n - 1 { + panic(short_e) + } + if tau.len < n - 1 { + panic(short_tau) + } + + if uplo == .upper { + // Reduce the upper triangle of A. + for i := n - 2; i >= 0; i-- { + // Generate elementary reflector H_i = I - tau * v * vᵀ to + // annihilate A[i:i-1, i+1]. + taui, _ := dlarfg(i + 1, a[i * lda + i + 1], mut a[0 + i + 1..], lda) + e[i] = a[i * lda + i + 1] + if taui != 0.0 { + // Apply H_i from both sides to A[0:i,0:i]. + a[i * lda + i + 1] = 1.0 + + // Compute x := tau * A * v storing x in tau[0:i]. + blas.dsymv(.upper, i + 1, taui, a, lda, a[i + 1..], lda, 0, mut tau, 1) + + // Compute w := x - 1/2 * tau * (xᵀ * v) * v. + alpha := -0.5 * taui * blas.ddot(i + 1, tau, 1, a[i + 1..], lda) + blas.daxpy(i + 1, alpha, a[i + 1..], lda, mut tau, 1) + + // Apply the transformation as a rank-2 update + // A = A - v * wᵀ - w * vᵀ. + blas.dsyr2(.upper, i + 1, -1.0, a[i + 1..], lda, tau, 1, mut a, lda) + a[i * lda + i + 1] = e[i] + } + d[i + 1] = a[(i + 1) * lda + i + 1] + tau[i] = taui + } + d[0] = a[0] + } else { + // Reduce the lower triangle of A. + for i := 0; i < n - 1; i++ { + // Generate elementary reflector H_i = I - tau * v * vᵀ to + // annihilate A[i+2:n, i]. + taui, _ := dlarfg(n - i - 1, a[(i + 1) * lda + i], mut a[math.min(i + 2, n - 1) * lda + + i..], lda) + e[i] = a[(i + 1) * lda + i] + if taui != 0.0 { + // Apply H_i from both sides to A[i+1:n, i+1:n]. + a[(i + 1) * lda + i] = 1.0 + + // Compute x := tau * A * v, storing y in tau[i:n-1]. + blas.dsymv(.lower, n - i - 1, taui, a[(i + 1) * lda + i + 1..], lda, a[(i + + 1) * lda + i..], lda, 0, mut tau[i..], 1) + + // Compute w := x - 1/2 * tau * (xᵀ * v) * v. + alpha := -0.5 * taui * blas.ddot(n - i - 1, tau[i..], 1, a[(i + 1) * lda + i..], + lda) + blas.daxpy(n - i - 1, alpha, a[(i + 1) * lda + i..], lda, mut tau[i..], + 1) + + // Apply the transformation as a rank-2 update + // A = A - v * wᵀ - w * vᵀ. + blas.dsyr2(.lower, n - i - 1, -1.0, a[(i + 1) * lda + i..], lda, tau[i..], + 1, mut a[(i + 1) * lda + i + 1..], lda) + a[(i + 1) * lda + i] = e[i] + } + d[i] = a[i * lda + i] + tau[i] = taui + } + d[n - 1] = a[(n - 1) * lda + n - 1] + } +} diff --git a/lapack/lapack64/errors.v b/lapack/lapack64/errors.v index 91a96995f..642ed523a 100644 --- a/lapack/lapack64/errors.v +++ b/lapack/lapack64/errors.v @@ -173,6 +173,6 @@ pub const bad_ld_z = 'lapack: bad leading dimension of Z' // Panic strings for bad vector increments. pub const abs_inc_not_one = 'lapack: increment not one or negative one' -pub const bad_inc_x = 'lapack: incXpub const <= 0' -pub const bad_inc_y = 'lapack: incYpub const <= 0' -pub const zero_inc_v = 'lapack:pub const incvpub const == 0' +pub const bad_inc_x = 'lapack: incx <= 0' +pub const bad_inc_y = 'lapack: incy <= 0' +pub const zero_inc_v = 'lapack: incv == 0' diff --git a/lapack/lapack64/iladlc.v b/lapack/lapack64/iladlc.v new file mode 100644 index 000000000..893509b1a --- /dev/null +++ b/lapack/lapack64/iladlc.v @@ -0,0 +1,42 @@ +module lapack64 + +import math + +// iladlc scans a matrix for its last non-zero column. Returns -1 if the matrix +// is all zeros. +pub fn iladlc(m int, n int, a []f64, lda int) int { + if m < 0 { + panic(m_lt0) + } + if n < 0 { + panic(n_lt0) + } + if lda < math.max(1, n) { + panic(bad_ld_a) + } + + if n == 0 || m == 0 { + return -1 + } + + if a.len < (m - 1) * lda + n { + panic(short_a) + } + + // Test common case where corner is non-zero. + if a[n - 1] != 0 || a[(m - 1) * lda + (n - 1)] != 0 { + return n - 1 + } + + // Scan each row tracking the highest column seen. + mut highest := -1 + for i := 0; i < m; i++ { + for j := n - 1; j >= 0; j-- { + if a[i * lda + j] != 0 { + highest = math.max(highest, j) + break + } + } + } + return highest +} diff --git a/lapack/lapack64/iladlr.v b/lapack/lapack64/iladlr.v new file mode 100644 index 000000000..a412219f2 --- /dev/null +++ b/lapack/lapack64/iladlr.v @@ -0,0 +1,38 @@ +module lapack64 + +import math + +// iladlr scans a matrix for its last non-zero row. Returns -1 if the matrix +// is all zeros. +pub fn iladlr(m int, n int, a []f64, lda int) int { + if m < 0 { + panic(m_lt0) + } + if n < 0 { + panic(n_lt0) + } + if lda < math.max(1, n) { + panic(bad_ld_a) + } + + if n == 0 || m == 0 { + return -1 + } + + if a.len < (m - 1) * lda + n { + panic(short_a) + } + + // Check the common case where the corner is non-zero + if a[(m - 1) * lda] != 0 || a[(m - 1) * lda + n - 1] != 0 { + return m - 1 + } + for i := m - 1; i >= 0; i-- { + for j := 0; j < n; j++ { + if a[i * lda + j] != 0 { + return i + } + } + } + return -1 +} From e83de293d661b7036e276105c5cc2ac10a900964 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 23 Jun 2024 04:06:14 -0300 Subject: [PATCH 28/33] refactor: Update LAPACK functions in lapack_notd_vsl_lapack_lapacke.v to handle errors and use named constants --- lapack/cflags_notd_vsl_lapack_lapacke.v | 14 -------------- lapack/lapack_notd_vsl_lapack_lapacke.v | 8 ++++---- 2 files changed, 4 insertions(+), 18 deletions(-) delete mode 100644 lapack/cflags_notd_vsl_lapack_lapacke.v diff --git a/lapack/cflags_notd_vsl_lapack_lapacke.v b/lapack/cflags_notd_vsl_lapack_lapacke.v deleted file mode 100644 index 081f788db..000000000 --- a/lapack/cflags_notd_vsl_lapack_lapacke.v +++ /dev/null @@ -1,14 +0,0 @@ -module lapack - -#flag linux -O2 -I/usr/local/include -I/usr/lib -#flag linux -L/usr/local/lib -L/usr/lib -#flag windows -O2 -#flag windows -lgfortran -// Intel, M1 brew, and MacPorts -#flag darwin -L/usr/local/opt/lapack/lib -L/opt/homebrew/opt/lapack/lib -L/opt/local/opt/lapack/lib -#flag -I@VMODROOT -#flag -llapacke - -$if macos { - #include -} diff --git a/lapack/lapack_notd_vsl_lapack_lapacke.v b/lapack/lapack_notd_vsl_lapack_lapacke.v index e16020046..e46de9120 100644 --- a/lapack/lapack_notd_vsl_lapack_lapacke.v +++ b/lapack/lapack_notd_vsl_lapack_lapacke.v @@ -55,7 +55,7 @@ pub fn dgesvd(jobu SVDJob, jobvt SVDJob, m int, n int, mut a []f64, lda int, s [ info := lapack64.dgesvd(jobu, jobvt, m, n, mut a, lda, s, mut u, ldu, mut vt, ldvt, superb) if info != 0 { - errors.vsl_panic('lapack failed', .efailed) + errors.vsl_panic('LAPACK dgesvd failed with error code: $info', .efailed) } } @@ -90,7 +90,7 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, mut ipiv []int) { pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) { info := lapack64.dgetri(n, mut a, lda, mut ipiv) if info != 0 { - errors.vsl_panic('lapack failed', .efailed) + errors.vsl_panic('LAPACK dgesvd failed with error code: $info', .efailed) } } @@ -114,7 +114,7 @@ pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) { pub fn dpotrf(uplo blas.Uplo, n int, mut a []f64, lda int) { info := lapack64.dpotrf(uplo, n, mut a, lda) if info != 0 { - errors.vsl_panic('lapack failed', .efailed) + errors.vsl_panic('LAPACK dgesvd failed with error code: $info', .efailed) } } @@ -163,6 +163,6 @@ pub fn dgeev(calc_vl LeftEVJob, calc_vr LeftEVJob, n int, mut a []f64, lda int, info := lapack64.dgeev(calc_vl, calc_vr, n, mut a, lda, wr, wi, mut vl, ldvl, mut vr, ldvr) if info != 0 { - errors.vsl_panic('lapack failed', .efailed) + errors.vsl_panic('LAPACK dgesvd failed with error code: $info', .efailed) } } From b06e436dc5812c305c671776b6b1092b8850e6c9 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 23 Jun 2024 04:10:48 -0300 Subject: [PATCH 29/33] refactor: Update ci.yml to execute tests using Pure V Backend --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1cdfa15e6..2761553b2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,8 +64,8 @@ jobs: - name: Move VSL source code to V Modules run: mv ./vsl ~/.vmodules - # - name: Execute Tests using Pure V Backend - # run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} + - name: Execute Tests using Pure V Backend + run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} - name: Execute Tests using Pure V Backend with Pure C Backend run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} --use-cblas --use-lapacke From 36ae80e00f12ae032447678cc1401af4d0751454 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 23 Jun 2024 04:13:54 -0300 Subject: [PATCH 30/33] refactor: Update ci.yml to execute tests using Pure V Backend with CBLAS and LAPACKE --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2761553b2..34dc5c559 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,7 +65,7 @@ jobs: run: mv ./vsl ~/.vmodules - name: Execute Tests using Pure V Backend - run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} + run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} --use-cblas - name: Execute Tests using Pure V Backend with Pure C Backend run: ~/.vmodules/vsl/bin/test ${{ matrix.flags }} --use-cblas --use-lapacke From 63adcf6a867277f3a0bb6c892e074b1eb506904b Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 23 Jun 2024 13:05:56 -0300 Subject: [PATCH 31/33] refactor: Update ci.yml to execute tests using Pure V Backend with CBLAS and LAPACKE --- lapack/conversions.v | 24 ++++++++++----------- lapack/lapack64/conversions.v | 28 ++++++++++++------------- lapack/lapack64/dgeev.v | 2 +- lapack/lapack64/dgesvd.v | 2 +- lapack/lapack64/dsteqr.v | 10 ++++----- lapack/lapack64/dsyev.v | 5 +++-- lapack/lapack64/errors.v | 14 ++++++------- lapack/lapack64/ilaenv.v | 4 ++-- lapack/lapack_d_vsl_lapack_lapacke.v | 10 ++++----- lapack/lapack_notd_vsl_lapack_lapacke.v | 14 ++++++------- 10 files changed, 57 insertions(+), 56 deletions(-) diff --git a/lapack/conversions.v b/lapack/conversions.v index fcb1cb239..c58237c56 100644 --- a/lapack/conversions.v +++ b/lapack/conversions.v @@ -32,17 +32,17 @@ pub type SVDJob = lapack64.SVDJob // GSVDJob specifies the singular vector computation type for Generalized SVD. pub type GSVDJob = lapack64.GSVDJob -// EVComp specifies how eigenvectors are computed in Dsteqr. -pub type EVComp = lapack64.EVComp +// EigenVectorsComp specifies how eigenvectors are computed in Dsteqr. +pub type EigenVectorsComp = lapack64.EigenVectorsComp -// EVJob specifies whether eigenvectors are computed in Dsyev. -pub type EVJob = lapack64.EVJob +// EigenVectorsJob specifies whether eigenvectors are computed in Dsyev. +pub type EigenVectorsJob = lapack64.EigenVectorsJob -// LeftEVJob specifies whether left eigenvectors are computed in Dgeev. -pub type LeftEVJob = lapack64.LeftEVJob +// LeftEigenVectorsJob specifies whether left eigenvectors are computed in Dgeev. +pub type LeftEigenVectorsJob = lapack64.LeftEigenVectorsJob -// RightEVJob specifies whether right eigenvectors are computed in Dgeev. -pub type RightEVJob = lapack64.RightEVJob +// RightEigenVectorsJob specifies whether right eigenvectors are computed in Dgeev. +pub type RightEigenVectorsJob = lapack64.RightEigenVectorsJob // BalanceJob specifies matrix balancing operation. pub type BalanceJob = lapack64.BalanceJob @@ -56,11 +56,11 @@ pub type SchurComp = lapack64.SchurComp // UpdateSchurComp specifies whether the matrix of Schur vectors is updated in Dtrexc. pub type UpdateSchurComp = lapack64.UpdateSchurComp -// EVSide specifies what eigenvectors are computed in Dtrevc3. -pub type EVSide = lapack64.EVSide +// EigenVectorsSide specifies what eigenvectors are computed in Dtrevc3. +pub type EigenVectorsSide = lapack64.EigenVectorsSide -// EVHowMany specifies which eigenvectors are computed in Dtrevc3 and how. -pub type EVHowMany = lapack64.EVHowMany +// EigenVectorsHowMany specifies which eigenvectors are computed in Dtrevc3 and how. +pub type EigenVectorsHowMany = lapack64.EigenVectorsHowMany // MaximizeNormXJob specifies the heuristic method for computing a contribution to // the reciprocal Dif-estimate in Dlatdf. diff --git a/lapack/lapack64/conversions.v b/lapack/lapack64/conversions.v index 5052633f9..dbb33db5d 100644 --- a/lapack/lapack64/conversions.v +++ b/lapack/lapack64/conversions.v @@ -67,7 +67,7 @@ pub enum GenOrtho as u8 { generate_q = u8(`Q`) } -// SVDJob specifies the singular vector computation type for SVD. +// SVDJob specifies the singular vector computation type for SingularValueDecomposition. pub enum SVDJob as u8 { // Compute all columns of the orthogonal matrix U or V. svd_all = u8(`A`) @@ -79,7 +79,7 @@ pub enum SVDJob as u8 { svd_none = u8(`N`) } -// GSVDJob specifies the singular vector computation type for Generalized SVD. +// GSVDJob specifies the singular vector computation type for Generalized SingularValueDecomposition. pub enum GSVDJob as u8 { // Compute orthogonal matrix U. gsvd_u = u8(`U`) @@ -93,8 +93,8 @@ pub enum GSVDJob as u8 { gsvd_none = u8(`N`) } -// EVComp specifies how eigenvectors are computed in Dsteqr. -pub enum EVComp as u8 { +// EigenVectorsComp specifies how eigenvectors are computed in Dsteqr. +pub enum EigenVectorsComp as u8 { // Compute eigenvectors of the original symmetric matrix. ev_orig = u8(`V`) // Compute eigenvectors of the tridiagonal matrix. @@ -103,24 +103,24 @@ pub enum EVComp as u8 { ev_comp_none = u8(`N`) } -// EVJob specifies whether eigenvectors are computed in Dsyev. -pub enum EVJob as u8 { +// EigenVectorsJob specifies whether eigenvectors are computed in Dsyev. +pub enum EigenVectorsJob as u8 { // Compute eigenvectors. ev_compute = u8(`V`) // Do not compute eigenvectors. ev_none = u8(`N`) } -// LeftEVJob specifies whether left eigenvectors are computed in Dgeev. -pub enum LeftEVJob as u8 { +// LeftEigenVectorsJob specifies whether left eigenvectors are computed in Dgeev. +pub enum LeftEigenVectorsJob as u8 { // Compute left eigenvectors. left_ev_compute = u8(`V`) // Do not compute left eigenvectors. left_ev_none = u8(`N`) } -// RightEVJob specifies whether right eigenvectors are computed in Dgeev. -pub enum RightEVJob as u8 { +// RightEigenVectorsJob specifies whether right eigenvectors are computed in Dgeev. +pub enum RightEigenVectorsJob as u8 { // Compute right eigenvectors. right_ev_compute = u8(`V`) // Do not compute right eigenvectors. @@ -159,8 +159,8 @@ pub enum UpdateSchurComp as u8 { update_schur_none = u8(`N`) } -// EVSide specifies what eigenvectors are computed in Dtrevc3. -pub enum EVSide as u8 { +// EigenVectorsSide specifies what eigenvectors are computed in Dtrevc3. +pub enum EigenVectorsSide as u8 { // Compute only right eigenvectors. ev_right = u8(`R`) // Compute only left eigenvectors. @@ -169,8 +169,8 @@ pub enum EVSide as u8 { ev_both = u8(`B`) } -// EVHowMany specifies which eigenvectors are computed in Dtrevc3 and how. -pub enum EVHowMany as u8 { +// EigenVectorsHowMany specifies which eigenvectors are computed in Dtrevc3 and how. +pub enum EigenVectorsHowMany as u8 { // Compute all right and/or left eigenvectors. ev_all = u8(`A`) // Compute all right and/or left eigenvectors multiplied by an input matrix. diff --git a/lapack/lapack64/dgeev.v b/lapack/lapack64/dgeev.v index 38799070c..4ed16de5b 100644 --- a/lapack/lapack64/dgeev.v +++ b/lapack/lapack64/dgeev.v @@ -4,7 +4,7 @@ import math import vsl.blas // dgeev computes the eigenvalues and, optionally, the left and/or right eigenvectors for a real nonsymmetric matrix A. -pub fn dgeev(jobvl LeftEVJob, jobvr LeftEVJob, n int, mut a []f64, lda int, wr []f64, wi []f64, mut vl []f64, ldvl int, mut vr []f64, ldvr int) int { +pub fn dgeev(jobvl LeftEigenVectorsJob, jobvr LeftEigenVectorsJob, n int, mut a []f64, lda int, wr []f64, wi []f64, mut vl []f64, ldvl int, mut vr []f64, ldvr int) int { if n == 0 { return 0 } diff --git a/lapack/lapack64/dgesvd.v b/lapack/lapack64/dgesvd.v index a8f035e7d..aad7e51ae 100644 --- a/lapack/lapack64/dgesvd.v +++ b/lapack/lapack64/dgesvd.v @@ -3,7 +3,7 @@ module lapack64 import math import vsl.blas -// dgesvd computes the singular value decomposition (SVD) of a real matrix A. +// dgesvd computes the singular value decomposition (SingularValueDecomposition) of a real matrix A. pub fn dgesvd(jobu SVDJob, jobvt SVDJob, m int, n int, mut a []f64, lda int, s []f64, mut u []f64, ldu int, mut vt []f64, ldvt int, superb []f64) int { if m == 0 || n == 0 { return 0 diff --git a/lapack/lapack64/dsteqr.v b/lapack/lapack64/dsteqr.v index ce61a1f63..e9b2ab7a1 100644 --- a/lapack/lapack64/dsteqr.v +++ b/lapack/lapack64/dsteqr.v @@ -17,17 +17,17 @@ import vsl.blas // dsteqr will panic otherwise. // // z, on entry, contains the n×n orthogonal matrix used in the reduction to -// tridiagonal form if compz == lapack.EVOrig. On exit, if -// compz == lapack.EVOrig, z contains the orthonormal eigenvectors of the -// original symmetric matrix, and if compz == lapack.EVTridiag, z contains the +// tridiagonal form if compz == lapack.EigenVectorsOrig. On exit, if +// compz == lapack.EigenVectorsOrig, z contains the orthonormal eigenvectors of the +// original symmetric matrix, and if compz == lapack.EigenVectorsTridiag, z contains the // orthonormal eigenvectors of the symmetric tridiagonal matrix. z is not used -// if compz == lapack.EVCompNone. +// if compz == lapack.EigenVectorsCompNone. // // work must have length at least max(1, 2*n-2) if the eigenvectors are computed, // and dsteqr will panic otherwise. // // dsteqr is an internal routine. It is exported for testing purposes. -pub fn dsteqr(compz EVComp, n int, mut d []f64, mut e []f64, mut z []f64, ldz int, mut work []f64) bool { +pub fn dsteqr(compz EigenVectorsComp, n int, mut d []f64, mut e []f64, mut z []f64, ldz int, mut work []f64) bool { if compz != .ev_comp_none && compz != .ev_tridiag && compz != .ev_orig { panic('bad_ev_comp') } diff --git a/lapack/lapack64/dsyev.v b/lapack/lapack64/dsyev.v index 12cf3638b..ef995f40d 100644 --- a/lapack/lapack64/dsyev.v +++ b/lapack/lapack64/dsyev.v @@ -3,7 +3,7 @@ module lapack64 import math import vsl.blas -pub fn dsyev(jobz EVJob, uplo blas.Uplo, n int, mut a []f64, lda int, mut w []f64, mut work []f64, lwork int) { +pub fn dsyev(jobz EigenVectorsJob, uplo blas.Uplo, n int, mut a []f64, lda int, mut w []f64, mut work []f64, lwork int) { if jobz != .ev_none && jobz != .ev_compute { panic(bad_ev_job) } @@ -89,7 +89,8 @@ pub fn dsyev(jobz EVJob, uplo blas.Uplo, n int, mut a []f64, lda int, mut w []f6 } } else { dorgtr(uplo, n, mut a, lda, work[indtau..], mut work[indwork..], llwork) - if !dsteqr(EVComp(jobz), n, mut w, mut work[inde..], mut a, lda, mut work[indtau..]) { + if !dsteqr(EigenVectorsComp(jobz), n, mut w, mut work[inde..], mut a, lda, mut + work[indtau..]) { panic('Dsteqr failed') } } diff --git a/lapack/lapack64/errors.v b/lapack/lapack64/errors.v index 642ed523a..27b95002f 100644 --- a/lapack/lapack64/errors.v +++ b/lapack/lapack64/errors.v @@ -7,19 +7,19 @@ pub const bad_apply_ortho = 'lapack: bad ApplyOrtho' pub const bad_balance_job = 'lapack: bad BalanceJob' pub const bad_diag = 'lapack: bad Diag' pub const bad_direct = 'lapack: bad Direct' -pub const bad_ev_comp = 'lapack: bad EVComp' -pub const bad_ev_how_many = 'lapack: bad EVHowMany' -pub const bad_ev_job = 'lapack: bad EVJob' -pub const bad_ev_side = 'lapack: bad EVSide' +pub const bad_ev_comp = 'lapack: bad EigenVectorsComp' +pub const bad_ev_how_many = 'lapack: bad EigenVectorsHowMany' +pub const bad_ev_job = 'lapack: bad EigenVectorsJob' +pub const bad_ev_side = 'lapack: bad EigenVectorsSide' pub const bad_gsvd_job = 'lapack: bad GSVDJob' pub const bad_gen_ortho = 'lapack: bad GenOrtho' -pub const bad_left_ev_job = 'lapack: bad LeftEVJob' +pub const bad_left_ev_job = 'lapack: bad LeftEigenVectorsJob' pub const bad_matrix_type = 'lapack: bad MatrixType' pub const bad_maximize_norm_x_job = 'lapack: bad MaximizeNormXJob' pub const bad_norm = 'lapack: bad Norm' pub const bad_ortho_comp = 'lapack: bad OrthoComp' pub const bad_pivot = 'lapack: bad Pivot' -pub const bad_right_ev_job = 'lapack: bad RightEVJob' +pub const bad_right_ev_job = 'lapack: bad RightEigenVectorsJob' pub const bad_svd_job = 'lapack: bad SVDJob' pub const bad_schur_comp = 'lapack: bad SchurComp' pub const bad_schur_job = 'lapack: bad SchurJob' @@ -29,7 +29,7 @@ pub const bad_store_v = 'lapack: bad StoreV' pub const bad_trans = 'lapack: bad Trans' pub const bad_update_schur_comp = 'lapack: bad UpdateSchurComp' pub const bad_uplo = 'lapack: bad Uplo' -pub const both_svd_over = 'lapack: both jobU and jobVT are lapack.SVDOverwrite' +pub const both_svd_over = 'lapack: both jobU and jobVT are lapack.SingularValueDecompositionOverwrite' // Panic strings for bad numerical and string values. pub const bad_ifst = 'lapack: ifst out of range' diff --git a/lapack/lapack64/ilaenv.v b/lapack/lapack64/ilaenv.v index f9661e1c2..7b1093e90 100644 --- a/lapack/lapack64/ilaenv.v +++ b/lapack/lapack64/ilaenv.v @@ -11,7 +11,7 @@ import math // crossover to an unblocked version. // 4: The number of shifts. // 5: The minimum column dimension for blocking to be used. -// 6: The crossover point for SVD (to use QR factorization or not). +// 6: The crossover point for SingularValueDecomposition (to use QR factorization or not). // 7: The number of processors. // 8: The crossover point for multi-shift in QR and QZ methods for non-symmetric eigenvalue problems. // 9: Maximum size of the subproblems in divide-and-conquer algorithms. @@ -241,7 +241,7 @@ fn ilaenv(ispec int, name string, opts string, n1 int, n2 int, n3 int, n4 int) i return 2 } 6 { - // Used by xGELSS and xGESVD + // Used by xGELSS and xGESingularValueDecomposition // Assuming n1 and n2 are defined elsewhere in your code // Replace `min(n1, n2)` with actual min calculation or function return int(f64(math.min(n1, n2)) * 1.6) diff --git a/lapack/lapack_d_vsl_lapack_lapacke.v b/lapack/lapack_d_vsl_lapack_lapacke.v index bd96129ae..39113c5bc 100644 --- a/lapack/lapack_d_vsl_lapack_lapacke.v +++ b/lapack/lapack_d_vsl_lapack_lapacke.v @@ -13,9 +13,9 @@ fn C.LAPACKE_dgetri(matrix_layout blas.MemoryLayout, n int, a &f64, lda int, ipi fn C.LAPACKE_dpotrf(matrix_layout blas.MemoryLayout, uplo blas.Uplo, n int, a &f64, lda int) int -fn C.LAPACKE_dgeev(matrix_layout blas.MemoryLayout, calc_vl LeftEVJob, calc_vr LeftEVJob, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int +fn C.LAPACKE_dgeev(matrix_layout blas.MemoryLayout, calc_vl LeftEigenVectorsJob, calc_vr LeftEigenVectorsJob, n int, a &f64, lda int, wr &f64, wi &f64, vl &f64, ldvl_ int, vr &f64, ldvr_ int) int -fn C.LAPACKE_dsyev(matrix_layout blas.MemoryLayout, jobz EVJob, uplo blas.Uplo, n int, a &f64, lda int, w &f64, work &f64, lwork int) int +fn C.LAPACKE_dsyev(matrix_layout blas.MemoryLayout, jobz EigenVectorsJob, uplo blas.Uplo, n int, a &f64, lda int, w &f64, work &f64, lwork int) int fn C.LAPACKE_dgebal(matrix_layout blas.MemoryLayout, job BalanceJob, n int, a &f64, lda int, ilo int, ihi int, scale &f64) int @@ -54,13 +54,13 @@ pub fn dgesv(n int, nrhs int, mut a []f64, lda int, mut ipiv []int, mut b []f64, } } -// dgesvd computes the singular value decomposition (SVD) of a real M-by-N matrix A, optionally computing the left and/or right singular vectors. +// dgesvd computes the singular value decomposition (SingularValueDecomposition) of a real M-by-N matrix A, optionally computing the left and/or right singular vectors. // // See: http://www.netlib.org/lapack/explore-html/d8/d2d/dgesvd_8f.html // // See: https://software.intel.com/en-us/mkl-developer-reference-c-gesvd // -// The SVD is written +// The SingularValueDecomposition is written // // A = U * SIGMA * transpose(V) // @@ -173,7 +173,7 @@ pub fn dpotrf(uplo blas.Uplo, n int, mut a []f64, lda int) { // // The computed eigenvectors are normalized to have Euclidean norm // equal to 1 and largest component real. -pub fn dgeev(calc_vl LeftEVJob, calc_vr LeftEVJob, n int, mut a []f64, lda int, wr []f64, wi []f64, vl []f64, ldvl_ int, vr []f64, ldvr_ int) { +pub fn dgeev(calc_vl LeftEigenVectorsJob, calc_vr LeftEigenVectorsJob, n int, mut a []f64, lda int, wr []f64, wi []f64, vl []f64, ldvl_ int, vr []f64, ldvr_ int) { mut vvl := 0.0 mut vvr := 0.0 mut ldvl := ldvl_ diff --git a/lapack/lapack_notd_vsl_lapack_lapacke.v b/lapack/lapack_notd_vsl_lapack_lapacke.v index e46de9120..5dc5bb6c0 100644 --- a/lapack/lapack_notd_vsl_lapack_lapacke.v +++ b/lapack/lapack_notd_vsl_lapack_lapacke.v @@ -31,13 +31,13 @@ pub fn dgesv(n int, nrhs int, mut a []f64, lda int, mut ipiv []int, mut b []f64, lapack64.dgesv(n, nrhs, mut a, lda, mut ipiv, mut b, ldb) } -// dgesvd computes the singular value decomposition (SVD) of a real M-by-N matrix A, optionally computing the left and/or right singular vectors. +// dgesvd computes the singular value decomposition (SingularValueDecomposition) of a real M-by-N matrix A, optionally computing the left and/or right singular vectors. // // See: http://www.netlib.org/lapack/explore-html/d8/d2d/dgesvd_8f.html // // See: https://software.intel.com/en-us/mkl-developer-reference-c-gesvd // -// The SVD is written +// The SingularValueDecomposition is written // // A = U * SIGMA * transpose(V) // @@ -55,7 +55,7 @@ pub fn dgesvd(jobu SVDJob, jobvt SVDJob, m int, n int, mut a []f64, lda int, s [ info := lapack64.dgesvd(jobu, jobvt, m, n, mut a, lda, s, mut u, ldu, mut vt, ldvt, superb) if info != 0 { - errors.vsl_panic('LAPACK dgesvd failed with error code: $info', .efailed) + errors.vsl_panic('LAPACK dgesvd failed with error code: ${info}', .efailed) } } @@ -90,7 +90,7 @@ pub fn dgetrf(m int, n int, mut a []f64, lda int, mut ipiv []int) { pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) { info := lapack64.dgetri(n, mut a, lda, mut ipiv) if info != 0 { - errors.vsl_panic('LAPACK dgesvd failed with error code: $info', .efailed) + errors.vsl_panic('LAPACK dgesvd failed with error code: ${info}', .efailed) } } @@ -114,7 +114,7 @@ pub fn dgetri(n int, mut a []f64, lda int, mut ipiv []int) { pub fn dpotrf(uplo blas.Uplo, n int, mut a []f64, lda int) { info := lapack64.dpotrf(uplo, n, mut a, lda) if info != 0 { - errors.vsl_panic('LAPACK dgesvd failed with error code: $info', .efailed) + errors.vsl_panic('LAPACK dgesvd failed with error code: ${info}', .efailed) } } @@ -141,7 +141,7 @@ pub fn dpotrf(uplo blas.Uplo, n int, mut a []f64, lda int) { // // The computed eigenvectors are normalized to have Euclidean norm // equal to 1 and largest component real. -pub fn dgeev(calc_vl LeftEVJob, calc_vr LeftEVJob, n int, mut a []f64, lda int, wr []f64, wi []f64, mut vl []f64, ldvl_ int, mut vr []f64, ldvr_ int) { +pub fn dgeev(calc_vl LeftEigenVectorsJob, calc_vr LeftEigenVectorsJob, n int, mut a []f64, lda int, wr []f64, wi []f64, mut vl []f64, ldvl_ int, mut vr []f64, ldvr_ int) { mut vvl := 0.0 mut vvr := 0.0 mut ldvl := ldvl_ @@ -163,6 +163,6 @@ pub fn dgeev(calc_vl LeftEVJob, calc_vr LeftEVJob, n int, mut a []f64, lda int, info := lapack64.dgeev(calc_vl, calc_vr, n, mut a, lda, wr, wi, mut vl, ldvl, mut vr, ldvr) if info != 0 { - errors.vsl_panic('LAPACK dgesvd failed with error code: $info', .efailed) + errors.vsl_panic('LAPACK dgesvd failed with error code: ${info}', .efailed) } } From 8afd14a075983fcfefa7e7f7444a5c69c6f82dd8 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 23 Jun 2024 14:38:16 -0300 Subject: [PATCH 32/33] refactor: Update dpotrf function to use named constant for uplo parameter --- lapack/lapack64/dpotf2.v | 71 ++++++++++++++++++++++++++++++++++++++++ lapack/lapack64/dpotrf.v | 69 ++++++++++++++++++++++++++++---------- 2 files changed, 122 insertions(+), 18 deletions(-) create mode 100644 lapack/lapack64/dpotf2.v diff --git a/lapack/lapack64/dpotf2.v b/lapack/lapack64/dpotf2.v new file mode 100644 index 000000000..22640a366 --- /dev/null +++ b/lapack/lapack64/dpotf2.v @@ -0,0 +1,71 @@ +module lapack64 + +import math +import vsl.blas + +// dpotf2 computes the Cholesky decomposition of the symmetric positive definite +// matrix a. If ul == .upper, then a is stored as an upper-triangular matrix, +// and a = Uᵀ U is stored in place into a. If ul == .lower, then a = L Lᵀ +// is computed and stored in-place into a. If a is not positive definite, false +// is returned. This is the unblocked version of the algorithm. +// +// dpotf2 is an internal routine. It is exported for testing purposes. +pub fn dpotf2(ul blas.Uplo, n int, mut a []f64, lda int) bool { + if ul != .upper && ul != .lower { + panic(bad_uplo) + } + if n < 0 { + panic(n_lt0) + } + if lda < math.max(1, n) { + panic(bad_ld_a) + } + + // Quick return if possible. + if n == 0 { + return true + } + + if a.len < (n - 1) * lda + n { + panic(short_a) + } + + if ul == .upper { + for j := 0; j < n; j++ { + mut ajj := a[j * lda + j] + if j != 0 { + ajj -= blas.ddot(j, a[j..], lda, a[j..], lda) + } + if ajj <= 0 || math.is_nan(ajj) { + a[j * lda + j] = ajj + return false + } + ajj = math.sqrt(ajj) + a[j * lda + j] = ajj + if j < n - 1 { + blas.dgemv(.trans, j, n - j - 1, -1, a[j + 1..], lda, a[j..], lda, 1, mut + a[j * lda + j + 1..], 1) + blas.dscal(n - j - 1, 1 / ajj, mut a[j * lda + j + 1..], 1) + } + } + return true + } + for j := 0; j < n; j++ { + mut ajj := a[j * lda + j] + if j != 0 { + ajj -= blas.ddot(j, a[j * lda..], 1, a[j * lda..], 1) + } + if ajj <= 0 || math.is_nan(ajj) { + a[j * lda + j] = ajj + return false + } + ajj = math.sqrt(ajj) + a[j * lda + j] = ajj + if j < n - 1 { + blas.dgemv(.no_trans, n - j - 1, j, -1, a[(j + 1) * lda..], lda, a[j * lda..], + 1, 1, mut a[(j + 1) * lda + j..], lda) + blas.dscal(n - j - 1, 1 / ajj, mut a[(j + 1) * lda + j..], lda) + } + } + return true +} diff --git a/lapack/lapack64/dpotrf.v b/lapack/lapack64/dpotrf.v index b4d307809..d8e1c6e45 100644 --- a/lapack/lapack64/dpotrf.v +++ b/lapack/lapack64/dpotrf.v @@ -3,31 +3,64 @@ module lapack64 import math import vsl.blas -// dpotrf computes the Cholesky factorization of a real symmetric positive definite matrix A. -pub fn dpotrf(uplo blas.Uplo, n int, mut a []f64, lda int) int { +pub fn dpotrf(ul blas.Uplo, n int, mut a []f64, lda int) bool { + if ul != .upper && ul != .lower { + panic(bad_uplo) + } + if n < 0 { + panic(n_lt0) + } + if lda < math.max(1, n) { + panic(bad_ld_a) + } + + // Quick return if possible. if n == 0 { - return 0 + return true } - mut info := 0 - if uplo != .upper && uplo != .lower { - info = -1 - } else if n < 0 { - info = -2 - } else if lda < math.max(1, n) { - info = -4 + if a.len < (n - 1) * lda + n { + panic(short_a) } - if info != 0 { - return info + nb := ilaenv(1, 'DPOTRF', ul.str(), n, -1, -1, -1) + if nb <= 1 || n <= nb { + return dpotf2(ul, n, mut a, lda) } - // Quick return if possible - if n == 0 { - return 0 + if ul == .upper { + for j := 0; j < n; j += nb { + jb := math.min(nb, n - j) + blas.dsyrk(.upper, .trans, jb, j, -1, a[j..], lda, 1, mut a[j * lda + j..], + lda) + ok := dpotf2(.upper, jb, mut a[j * lda + j..], lda) + if !ok { + return false + } + if j + jb < n { + blas.dgemm(.trans, .no_trans, jb, n - j - jb, j, -1, a[j..], lda, a[j + jb..], + lda, 1, mut a[j * lda + j + jb..], lda) + blas.dtrsm(.left, .upper, .trans, .non_unit, jb, n - j - jb, 1, a[j * lda + j..], + lda, mut a[j * lda + j + jb..], lda) + } + } + return true } - // Placeholder for the actual LAPACK function calls - // Example: info = dpotrf(uplo, n, a, lda, work, lwork) - return info + for j := 0; j < n; j += nb { + jb := math.min(nb, n - j) + blas.dsyrk(.lower, .no_trans, jb, j, -1, a[j * lda..], lda, 1, mut a[j * lda + j..], + lda) + ok := dpotf2(.lower, jb, mut a[j * lda + j..], lda) + if !ok { + return false + } + if j + jb < n { + blas.dgemm(.no_trans, .trans, n - j - jb, jb, j, -1, a[(j + jb) * lda..], + lda, a[j * lda..], lda, 1, mut a[(j + jb) * lda + j..], lda) + blas.dtrsm(.right, .lower, .trans, .non_unit, n - j - jb, jb, 1, a[j * lda + j..], + lda, mut a[(j + jb) * lda + j..], lda) + } + } + return true } From 3af89bf4fa1a16684646f7da6c36100cb8a3c313 Mon Sep 17 00:00:00 2001 From: ulises-jeremias Date: Sun, 23 Jun 2024 14:49:16 -0300 Subject: [PATCH 33/33] refactor: Update BLAS and LAPACK functions to use named constants and handle errors --- blas/oblas_notd_vsl_blas_cblas.v | 87 ++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/blas/oblas_notd_vsl_blas_cblas.v b/blas/oblas_notd_vsl_blas_cblas.v index f05117128..53602851b 100644 --- a/blas/oblas_notd_vsl_blas_cblas.v +++ b/blas/oblas_notd_vsl_blas_cblas.v @@ -6,77 +6,164 @@ import vsl.blas.blas64 @[inline] pub fn set_num_threads(n int) {} +// ddot computes the dot product of two vectors. @[inline] pub fn ddot(n int, x []f64, incx int, y []f64, incy int) f64 { return blas64.ddot(n, x, incx, y, incy) } +// dasum computes the sum of the absolute values of elements in a vector. @[inline] pub fn dasum(n int, x []f64, incx int) f64 { return blas64.dasum(n, x, incx) } +// dnrm2 computes the Euclidean norm of a vector. @[inline] pub fn dnrm2(n int, x []f64, incx int) f64 { return blas64.dnrm2(n, x, incx) } +// daxpy computes y := alpha * x + y. @[inline] pub fn daxpy(n int, alpha f64, x []f64, incx int, mut y []f64, incy int) { blas64.daxpy(n, alpha, x, incx, mut y, incy) } +// dcopy copies a vector x to a vector y. @[inline] pub fn dcopy(n int, x []f64, incx int, mut y []f64, incy int) { blas64.dcopy(n, x, incx, mut y, incy) } +// dswap swaps the elements of two vectors. @[inline] pub fn dswap(n int, mut x []f64, incx int, mut y []f64, incy int) { blas64.dswap(n, mut x, incx, mut y, incy) } +// drot applies a plane rotation to points in the plane. @[inline] pub fn drot(n int, mut x []f64, incx int, mut y []f64, incy int, c f64, s f64) { blas64.drot(n, mut x, incx, mut y, incy, c, s) } +// dscal scales a vector by a constant. @[inline] pub fn dscal(n int, alpha f64, mut x []f64, incx int) { blas64.dscal(n, alpha, mut x, incx) } +// idamax finds the index of the element with the maximum absolute value. +@[inline] +pub fn idamax(n int, x []f64, incx int) int { + return blas64.idamax(n, x, incx) +} + +// dgemv performs matrix-vector multiplication. @[inline] pub fn dgemv(trans Transpose, m int, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { blas64.dgemv(trans, m, n, alpha, a, lda, x, incx, beta, mut y, incy) } +// dger performs the rank-1 update of a matrix. @[inline] pub fn dger(m int, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) { blas64.dger(m, n, alpha, x, incx, y, incy, mut a, lda) } +// dtrsv solves a system of linear equations with a triangular matrix. @[inline] pub fn dtrsv(uplo Uplo, trans_a Transpose, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { blas64.dtrsv(uplo, trans_a, diag, n, a, lda, mut x, incx) } +// dtrmv performs matrix-vector operations using a triangular matrix. @[inline] pub fn dtrmv(uplo Uplo, trans_a Transpose, diag Diagonal, n int, a []f64, lda int, mut x []f64, incx int) { blas64.dtrmv(uplo, trans_a, diag, n, a, lda, mut x, incx) } +// dsyr performs a symmetric rank-1 update of a matrix. @[inline] pub fn dsyr(uplo Uplo, n int, alpha f64, x []f64, incx int, mut a []f64, lda int) { blas64.dsyr(uplo, n, alpha, x, incx, mut a, lda) } +// dsyr2 performs a symmetric rank-2 update of a matrix. @[inline] pub fn dsyr2(uplo Uplo, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut a []f64, lda int) { blas64.dsyr2(uplo, n, alpha, x, incx, y, incy, mut a, lda) } +// dgemm performs matrix-matrix multiplication. @[inline] pub fn dgemm(trans_a Transpose, trans_b Transpose, m int, n int, k int, alpha f64, a []f64, lda int, b []f64, ldb int, beta f64, mut cc []f64, ldc int) { blas64.dgemm(trans_a, trans_b, m, n, k, alpha, a, lda, b, ldb, beta, mut cc, ldc) } + +// dgbmv performs a matrix-vector multiplication with a band matrix. +@[inline] +pub fn dgbmv(trans_a Transpose, m int, n int, kl int, ku int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { + blas64.dgbmv(trans_a, m, n, kl, ku, alpha, a, lda, x, incx, beta, mut y, incy) +} + +// dsymv performs a matrix-vector multiplication for a symmetric matrix. +@[inline] +pub fn dsymv(uplo Uplo, n int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { + blas64.dsymv(uplo, n, alpha, a, lda, x, incx, beta, mut y, incy) +} + +// dsbmv performs a matrix-vector multiplication with a symmetric band matrix. +@[inline] +pub fn dsbmv(uplo Uplo, n int, k int, alpha f64, a []f64, lda int, x []f64, incx int, beta f64, mut y []f64, incy int) { + blas64.dsbmv(uplo, n, k, alpha, a, lda, x, incx, beta, mut y, incy) +} + +// dtbmv performs a matrix-vector multiplication with a triangular band matrix. +@[inline] +pub fn dtbmv(uplo Uplo, trans_a Transpose, diag Diagonal, n int, k int, a []f64, lda int, mut x []f64, incx int) { + blas64.dtbmv(uplo, trans_a, diag, n, k, a, lda, mut x, incx) +} + +// dtbsv solves a system of linear equations with a triangular band matrix. +@[inline] +pub fn dtbsv(uplo Uplo, trans_a Transpose, diag Diagonal, n int, k int, a []f64, lda int, mut x []f64, incx int) { + blas64.dtbsv(uplo, trans_a, diag, n, k, a, lda, mut x, incx) +} + +// dtpmv performs a matrix-vector multiplication with a triangular packed matrix. +@[inline] +pub fn dtpmv(uplo Uplo, trans_a Transpose, diag Diagonal, n int, ap []f64, mut x []f64, incx int) { + blas64.dtpmv(uplo, trans_a, diag, n, ap, mut x, incx) +} + +// dtpsv solves a system of linear equations with a triangular packed matrix. +@[inline] +pub fn dtpsv(uplo Uplo, trans_a Transpose, diag Diagonal, n int, ap []f64, mut x []f64, incx int) { + blas64.dtpsv(uplo, trans_a, diag, n, ap, mut x, incx) +} + +// dspmv performs a matrix-vector multiplication with a symmetric packed matrix. +@[inline] +pub fn dspmv(uplo Uplo, n int, alpha f64, ap []f64, x []f64, incx int, beta f64, mut y []f64, incy int) { + blas64.dspmv(uplo, n, alpha, ap, x, incx, beta, mut y, incy) +} + +// dspr performs a symmetric rank-1 update for a packed matrix. +@[inline] +pub fn dspr(uplo Uplo, n int, alpha f64, x []f64, incx int, mut ap []f64) { + blas64.dspr(uplo, n, alpha, x, incx, mut ap) +} + +// dspr2 performs a symmetric rank-2 update for a packed matrix. +@[inline] +pub fn dspr2(uplo Uplo, n int, alpha f64, x []f64, incx int, y []f64, incy int, mut ap []f64) { + blas64.dspr2(uplo, n, alpha, x, incx, y, incy, mut ap) +} + +// dsyrk performs a symmetric rank-k update. +@[inline] +pub fn dsyrk(uplo Uplo, trans_a Transpose, n int, k int, alpha f64, a []f64, lda int, beta f64, mut c []f64, ldc int) { + blas64.dsyrk(uplo, trans_a, n, k, alpha, a, lda, beta, mut c, ldc) +}