forked from mnottoli/ddPCM
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathddpcm_lib.f90
491 lines (441 loc) · 14.4 KB
/
ddpcm_lib.f90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
module ddpcm_lib
use ddcosmo, only: nbasis, nsph, ngrid, ncav, lmax, iconv, iprint, &
& wghpot, intrhs, prtsph, zero, pt5, one, two, four, pi, basis, &
& eps, csph, rsph, grid, w, ui, ndiis, sprod, ylmbas
use pcm_fmm
!use ddcosmo
implicit none
real*8, allocatable :: rx_prc(:,:,:)
real*8, allocatable :: rhs(:,:), phieps(:,:), xs(:,:)
real*8, allocatable :: g(:,:)
logical :: dodiag
! Measure time
real*8 :: start_time, finish_time
! FMM-related global variables. I myself avoid global variables, but this
! program is written in this non-portable style. IT will take long time to
! change it in entire proglem, so I just follow it for now.
! Maximum degree of spherical harmonics for M (multipole) expansion
integer :: pm
! Maximum degree of spherical harmonics for L (local) expansion
integer :: pl
! Scalar factors for M2M, M2L and L2L operations
real*8, allocatable :: vscales(:)
!! Cluster tree information
! Reordering of spheres for better locality
integer, allocatable :: ind(:)
! First and last spheres, corresponding to given node of cluster tree
integer, allocatable :: cluster(:, :)
! Children of each cluster
integer, allocatable :: children(:, :)
! Parent of each cluster
integer, allocatable :: parent(:)
! Center of bounding sphere of each cluster
real*8, allocatable :: cnode(:, :)
! Radius of bounding sphere of each cluster
real*8, allocatable :: rnode(:)
! Which leaf node contains only given input sphere
integer, allocatable :: snode(:)
! Number of nodes (subclusters)
integer :: nclusters
! Height of a tree
integer :: height
! The same for improved tree
integer, allocatable :: cluster2(:, :), children2(:, :), parent2(:)
real*8, allocatable :: cnode2(:, :), rnode2(:)
! Total number of far and near admissible pairs
integer :: nnfar, nnnear
! Number of admissible pairs for each node
integer, allocatable :: nfar(:), nnear(:)
! Arrays of admissible far and near pairs
integer, allocatable :: far(:), near(:)
! Index of first element of array of admissible far and near pairs
integer, allocatable :: sfar(:), snear(:)
! Temporary variables to get list of all admissible pairs
integer :: lwork, iwork, jwork
integer, allocatable :: work(:, :)
contains
subroutine ddpcm(phi, psi, esolv)
! main ddpcm driver, given the potential at the exposed cavity
! points and the psi vector, computes the solvation energy
implicit none
real*8, intent(in) :: phi(ncav), psi(nbasis,nsph)
real*8, intent(inout) :: esolv
real*8 :: tol
integer :: isph, n_iter
logical :: ok
external :: lx, ldm1x, hnorm
allocate(rx_prc(nbasis,nbasis,nsph))
allocate(rhs(nbasis,nsph),phieps(nbasis,nsph),xs(nbasis,nsph))
allocate(g(ngrid,nsph))
tol = 10.0d0**(-iconv)
! build the preconditioner
call mkprec
! build the RHS
!write(6,*) 'pot', ncav
!do isph = 1, ncav
! write(6,*) phi(isph)
!end do
g = zero
xs = zero
call wghpot(phi,g)
do isph = 1, nsph
call intrhs(isph,g(:,isph),xs(:,isph))
end do
!call prtsph('phi',nsph,0,xs)
!call prtsph('psi',nsph,0,psi)
! rinf rhs
dodiag = .true.
call rinfx(nbasis*nsph,xs,rhs)
call prtsph('rhs',nsph,0,rhs)
! solve the ddpcm linear system
n_iter = 200
dodiag = .false.
call cpu_time(start_time)
call jacobi_diis(nsph*nbasis,iprint,ndiis,4,tol,rhs,phieps,n_iter, &
& ok,rx,apply_rx_prec,hnorm)
call cpu_time(finish_time)
write(6,*) 'ddpcm solve time: ', finish_time-start_time
!call prtsph('phie',nsph,0,phieps)
! solve the ddcosmo linear system
n_iter = 200
dodiag = .false.
call jacobi_diis(nsph*nbasis,iprint,ndiis,4,tol,phieps,xs,n_iter, &
& ok,lx,ldm1x,hnorm)
!call prtsph('x',nsph,0,xs)
! compute the energy
esolv = pt5*sprod(nsph*nbasis,xs,psi)
return
end subroutine ddpcm
subroutine ddpcm_fmm(phi, psi, esolv)
! FMM-accelerated ddpcm driver, given the potential at the exposed cavity
! points and the psi vector, computes the solvation energy
implicit none
real*8, intent(in) :: phi(ncav), psi(nbasis,nsph)
real*8, intent(inout) :: esolv
real*8 :: tol
integer :: isph, n_iter
logical :: ok
external :: lx, ldm1x, hnorm
! Prepare FMM tree and other things. This can be moved out from this
! function to rerun ddpcm_fmm with the same molecule without recomputing
! FMM-related variables.
nclusters = 2*nsph-1
allocate(ind(nsph), cluster(2, nclusters), children(2, nclusters), &
parent(nclusters), cnode(3, nclusters), rnode(nclusters), snode(nsph))
pm = lmax ! This must be updated to achieve required accuracy of FMM
pl = lmax ! This must be updated to achieve required accuracy of FMM
allocate(vscales((pm+pl+1)*(pm+pl+1)))
! Init order of spheres
do isph = 1, nsph
ind(isph) = isph
end do
! Init constants vscales
call init_globals(pm, pl, vscales, ngrid, w, grid, basis)
! Build initial binary tree
call btree1_init(nsph, csph, rsph, ind, cluster, children, parent, &
cnode, rnode, snode)
nclusters = 2*nsph-1
! Get its height
call tree_get_height(nclusters, parent, height)
! Compute size of improved tree (to allocate space)
call tree_improve_get_size(2*nsph-1, children, height, nclusters)
! Allocate memory for improved tree
allocate(cluster2(2, nclusters), children2(2, nclusters), parent2(nclusters))
allocate(cnode2(3, nclusters), rnode2(nclusters))
! Build improved tree
call tree_improve(2*nsph-1, nclusters, height, cluster, children, parent, &
cnode, rnode, cluster2, children2, parent2, cnode2, rnode2)
! Allocate space to find all admissible pairs
allocate(nfar(nclusters), nnear(nclusters))
! Try to find all admissibly far and near pairs of tree nodes
lwork = nclusters*100 ! Some magic constant which might be changed
allocate(work(3, lwork))
iwork = 0 ! init with zero for first call to tree_get_farnear_work
call tree_get_farnear_work(nclusters, children2, cnode2, rnode2, lwork, &
iwork, jwork, work, nnfar, nfar, nnnear, nnear)
! Increase size of work array if needed and run again. Function
! tree_get_farnear_work uses previously computed work array, so it will not
! do the same work several times.
if (iwork .ne. jwork+1) then
write(*,*) 'Please increase lwork on line 150 of ddpcm_lib.f90 in the code'
stop
end if
! Allocate arrays for admissible far and near pairs
allocate(far(nnfar), near(nnnear), sfar(nclusters+1), snear(nclusters+1))
! Get list of admissible pairs from temporary work array
call tree_get_farnear(jwork, lwork, work, nclusters, nnfar, nfar, sfar, &
far, nnnear, nnear, snear, near)
! Continue with ddpcm
allocate(rx_prc(nbasis,nbasis,nsph))
allocate(rhs(nbasis,nsph),phieps(nbasis,nsph),xs(nbasis,nsph))
allocate(g(ngrid,nsph))
tol = 10.0d0**(-iconv)
! build the preconditioner
call mkprec
! build the RHS
!write(6,*) 'pot', ncav
!do isph = 1, ncav
! write(6,*) phi(isph)
!end do
g = zero
xs = zero
call wghpot(phi,g)
do isph = 1, nsph
call intrhs(isph,g(:,isph),xs(:,isph))
end do
!call prtsph('phi',nsph,0,xs)
!call prtsph('psi',nsph,0,psi)
! rinf rhs
dodiag = .true.
call rinfx_fmm(nbasis*nsph,xs,rhs)
call prtsph('rhs',nsph,0,rhs)
! solve the ddpcm linear system
n_iter = 200
dodiag = .false.
call cpu_time(start_time)
call jacobi_diis(nsph*nbasis,iprint,ndiis,4,tol,rhs,phieps,n_iter, &
& ok,rx_fmm,apply_rx_prec,hnorm)
call cpu_time(finish_time)
write(6,*) 'ddpcm_fmm solve time: ', finish_time-start_time
!call prtsph('phie',nsph,0,phieps)
! solve the ddcosmo linear system
n_iter = 200
dodiag = .false.
call jacobi_diis(nsph*nbasis,iprint,ndiis,4,tol,phieps,xs,n_iter, &
& ok,lx,ldm1x,hnorm)
!call prtsph('x',nsph,0,xs)
! compute the energy
esolv = pt5*sprod(nsph*nbasis,xs,psi)
! Deallocate FMM-related arrays
deallocate(ind, cluster, children, parent, cnode, rnode, snode)
deallocate(vscales)
deallocate(cluster2, children2, parent2)
deallocate(cnode2, rnode2)
deallocate(nfar, nnear)
deallocate(work)
deallocate(far, sfar, near, snear)
return
end subroutine ddpcm_fmm
subroutine mkprec
! Assemble the diagonal blocks of the Reps matrix
! then invert them to build the preconditioner
implicit none
integer :: isph, lm, ind, l1, m1, ind1, its, istatus
real*8 :: f, f1
integer, allocatable :: ipiv(:)
real*8, allocatable :: work(:)
allocate(ipiv(nbasis),work(nbasis*nbasis),stat=istatus)
if (istatus.ne.0) then
write(*,*) 'mkprec : allocation failed !'
stop
endif
rx_prc(:,:,:) = zero
! off diagonal part
do isph = 1, nsph
do its = 1, ngrid
f = two*pi*ui(its,isph)*w(its)
do l1 = 0, lmax
ind1 = l1*l1 + l1 + 1
do m1 = -l1, l1
f1 = f*basis(ind1 + m1,its)/(two*dble(l1) + one)
do lm = 1, nbasis
rx_prc(lm,ind1 + m1,isph) = rx_prc(lm,ind1 + m1,isph) + &
& f1*basis(lm,its)
end do
end do
end do
end do
end do
! add diagonal
f = two*pi*(eps + one)/(eps - one)
do isph = 1, nsph
do lm = 1, nbasis
rx_prc(lm,lm,isph) = rx_prc(lm,lm,isph) + f
end do
end do
! invert the blocks
do isph = 1, nsph
call DGETRF(nbasis,nbasis,rx_prc(:,:,isph),nbasis,ipiv,istatus)
if (istatus.ne.0) then
write(6,*) 'LU failed in mkprc'
stop
end if
call DGETRI(nbasis,rx_prc(:,:,isph),nbasis,ipiv,work, &
& nbasis*nbasis,istatus)
if (istatus.ne.0) then
write(6,*) 'Inversion failed in mkprc'
stop
end if
end do
deallocate (work,ipiv,stat=istatus)
if (istatus.ne.0) then
write(*,*) 'mkprec : deallocation failed !'
stop
end if
return
endsubroutine mkprec
subroutine rx(n,x,y)
! Computes Y = Reps X =
! = (2*pi*(eps + 1)/(eps - 1) - D) X
implicit none
integer, intent(in) :: n
real*8, intent(in) :: x(nbasis,nsph)
real*8, intent(inout) :: y(nbasis,nsph)
real*8 :: fac
call dx(n,x,y)
y = -y
if (dodiag) then
fac = two*pi*(eps + one)/(eps - one)
y = y + fac*x
end if
end subroutine rx
subroutine rx_fmm(n, x, y)
! Use FMM to compute Y = Reps X =
! = (2*pi*(eps + 1)/(eps - 1) - D) X
implicit none
integer, intent(in) :: n
real*8, intent(in) :: x(nbasis,nsph)
real*8, intent(inout) :: y(nbasis,nsph)
real*8 :: fac
call dx_fmm(n,x,y)
y = -y
if (dodiag) then
fac = two*pi*(eps + one)/(eps - one)
y = y + fac*x
end if
end subroutine rx_fmm
subroutine rinfx(n,x,y)
! Computes Y = Rinf X =
! = (2*pi - D) X
implicit none
integer, intent(in) :: n
real*8, intent(in) :: x(nbasis,nsph)
real*8, intent(inout) :: y(nbasis,nsph)
real*8 :: fac
call dx(n,x,y)
y = -y
if (dodiag) then
fac = two*pi
y = y + fac*x
end if
end subroutine rinfx
subroutine rinfx_fmm(n,x,y)
! Computes Y = Rinf X =
! = (2*pi - D) X
implicit none
integer, intent(in) :: n
real*8, intent(in) :: x(nbasis,nsph)
real*8, intent(inout) :: y(nbasis,nsph)
real*8 :: fac
call dx_fmm(n,x,y)
y = -y
if (dodiag) then
fac = two*pi
y = y + fac*x
end if
end subroutine rinfx_fmm
subroutine dx(n,x,y)
! Computes Y = D X
implicit none
integer, intent(in) :: n
real*8, intent(in) :: x(nbasis,nsph)
real*8, intent(inout) :: y(nbasis,nsph)
real*8, allocatable :: vts(:), vplm(:), basloc(:), vcos(:), vsin(:)
real*8 :: c(3), vij(3), sij(3)
real*8 :: vvij, tij, fourpi, tt, f, f1
integer :: its, isph, jsph, l, m, ind, lm, istatus
allocate(vts(ngrid),vplm(nbasis),basloc(nbasis),vcos(lmax + 1), &
& vsin(lmax + 1),stat=istatus)
if (istatus.ne.0) then
write(6,*) 'dx: allocation failed !'
stop
end if
y = zero
fourpi = four*pi
! this loop is easily parallelizable
do isph = 1, nsph
! compute the "potential" from the other spheres
! at the exposed Lebedv points of the i-th sphere
vts = zero
do its = 1, ngrid
if (ui(its,isph).gt.zero) then
c = csph(:,isph) + rsph(isph)*grid(:,its)
do jsph = 1, nsph
if (jsph.ne.isph) then
! build the geometrical variables
vij = c - csph(:,jsph)
vvij = sqrt(dot_product(vij,vij))
tij = vvij/rsph(jsph)
sij = vij/vvij
! build the local basis
call ylmbas(sij,basloc,vplm,vcos,vsin)
! with all the required stuff, finally compute
! the "potential" at the point
tt = one/tij
do l = 0, lmax
ind = l*l + l + 1
f = fourpi*dble(l)/(two*dble(l) + one)*tt
do m = -l, l
vts(its) = vts(its) + f*x(ind + m,jsph) * &
& basloc(ind + m)
end do
tt = tt/tij
end do
else if (dodiag) then
! add the diagonal contribution
do l = 0, lmax
ind = l*l + l + 1
f = (two*dble(l) + one)/fourpi
do m = -l, l
vts(its) = vts(its) - pt5*x(ind + m,isph) * &
& basis(ind + m,its)/f
end do
end do
end if
end do
vts(its) = ui(its,isph)*vts(its)
end if
end do
! now integrate the potential to get its modal representation
call intrhs(isph,vts,y(:,isph))
end do
deallocate(vts,vplm,basloc,vcos,vsin,stat=istatus)
if (istatus.ne.0) then
write(6,*) 'dx: deallocation failed !'
stop
end if
end subroutine dx
subroutine dx_fmm(n,x,y)
! Computes Y = D X with help of FMM
implicit none
integer, intent(in) :: n
real*8, intent(in) :: x(nbasis,nsph)
real*8, intent(inout) :: y(nbasis,nsph)
real*8 :: fmm_x((pm+1)*(pm+1), nsph) ! Input for FMM
real*8 :: fmm_y((pl+1)*(pl+1), nsph) ! Output of FMM
fmm_x(1:nbasis, :) = x
fmm_x(nbasis+1:, :) = zero
! Do actual FMM matvec
call pcm_matvec_grid_fmm_fast(nsph, csph, rsph, ngrid, grid, w, basis, ui, &
pm, pl, vscales, ind, nclusters, cluster2, children2, cnode2, rnode2, &
nnfar, sfar, far, nnnear, snear, near, fmm_x, fmm_y)
! Cut fmm_y to fit into output y
y = fmm_y(1:nbasis, :)
!write(*,*) 'I did actual FMM matvec'
end subroutine dx_fmm
subroutine apply_rx_prec(n,x,y)
! apply the block diagonal preconditioner
implicit none
integer, intent(in) :: n
real*8, intent(in) :: x(nbasis,nsph)
real*8, intent(inout) :: y(nbasis,nsph)
integer :: isph
! simply do a matrix-vector product with the stored preconditioner
! !!$omp parallel do default(shared) schedule(dynamic) &
! !!$omp private(isph)
do isph = 1, nsph
call dgemm('n','n',nbasis,1,nbasis,one,rx_prc(:,:,isph),nbasis, &
& x(:,isph),nbasis,zero,y(:,isph),nbasis)
end do
end subroutine apply_rx_prec
end module ddpcm_lib