-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparallel.f90
9771 lines (9770 loc) · 329 KB
/
parallel.f90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
module parallel_mod
!
#include <mpi_defs.h>
#define PROFILE_ON
!
#ifdef DEBUG_ON
!!!#define DEBUG_PARALLEL
#endif
!
!.. Use Statements ..
use module_kind_types, global_debug_timer => debug_timer
use eqn_idx, only : nq
use geovar, only : face_t
use geovar, only : fp_t
!
#ifdef METIS_5
use metis5_mod
#else
use intrinsic :: iso_fortran_env, only : idx_t => int32
use intrinsic :: iso_fortran_env, only : real_t => real32
#endif
!
implicit none
!
private
!
!.. Public Module Procedures ..
!
public :: create_serial_cell_map
public :: partition_grid
!public :: collect_global_solpts
public :: collect_global_solution
!public :: collect_global_variable
public :: wait_faceusp
public :: exchange_faceusp
public :: wait_facedusp
public :: exchange_facedusp
public :: exch_edge_and_node_solutions
public :: parallel_memory_usage
public :: exch_connectivity
!public :: exchange_modes
!
integer, save :: nr
!
! Number of communication partners
!
integer, save :: ncomm
!
! Request and status arrays for non-blocking exchanges
!
_MPI_REQUEST_TYPE_, save, allocatable :: fusp_rqst(:)
_MPI_REQUEST_TYPE_, save, allocatable :: fdusp_rqst(:)
!
_MPI_STATUS_TYPE_, save, allocatable :: fusp_istat(:,:)
_MPI_STATUS_TYPE_, save, allocatable :: fdusp_istat(:,:)
!
! Exchange buffer
!
real(r8), save, allocatable, dimension(:) :: fusp_send_buffer
real(r8), save, allocatable, dimension(:) :: fusp_recv_buffer
real(r8), save, allocatable, dimension(:) :: fdusp_send_buffer
real(r8), save, allocatable, dimension(:) :: fdusp_recv_buffer
!
integer, save :: n_node_edge_pts
!
! MPI datatype for collecting the global solution on all processors
!
_MPI_DATA_TYPE_, save, allocatable, dimension(:) :: recv_sol
!
!
!
type :: flux_pts_t
integer, allocatable :: pts(:)
end type flux_pts_t
!
! ######################################################################
! ##### BOUNDARY NODE/EDGE DERIVED TYPES AND ASSOCIATED ARRAYS #####
! ######################################################################
!
! BND_DATA_T : derived type to compactly store the local indices of either
! the nodes or edges of a partition that are located on a
! partition boundary
!
type :: bnd_data_t
! idx : local index of the node or edge on a partition boundary
integer :: idx
! ave_correction : the averaging coefficient used to compute the local node
! or edge point values is based on the number of cells on
! the local partition that contribute to this node/edge.
! If the number of local cells containing this node/edge
! is lc, these values are locally averaged using the
! coefficient (1/lc). If the number of global cells
! containing this node/edge is gc, this averaging
! correction is simply multiplying these node/edge
! values by (lc/gc), resulting in the final averaging
! coefficient of (1/gc) for this node/edge.
real(wp) :: ave_correction
! my_responsibility : logical flag that indicates if the node or edge point
! indices for the subcell connectivities involving this
! this node or edge are the responsibility of this
! processor
logical(lk) :: my_responsibility = fals
end type bnd_data_t
!
! BND_NODES : Array of type bnd_data_t to store the local index and
! averaging correction for all nodes of the partition that
! are located on a partition boundary
!
type(bnd_data_t), save, allocatable, dimension(:) :: bnd_nodes
!
! BND_EDGES : Array of type bnd_data_t to store the local index and
! averaging correction for all edges of the partition that
! are located on a partition boundary
!
type(bnd_data_t), save, allocatable, dimension(:) :: bnd_edges
!
! BND_FACES : Array of type bnd_data_t to store the local index and
! averaging correction for all faces of the partition that
! are located on a partition boundary
! NOTE: The averaging correction is not needed for faces as
! this is only really used to identify the partition
! that is responsible for the face point indices
!
type(bnd_data_t), save, allocatable, dimension(:) :: bnd_faces
!
!
!
! ##########################################################
! ##### MAPPING DERIVED TYPE AND ASSOCIATED ARRAYS #####
! ##########################################################
!
! MAP_T : derived type to store mappings between local and global
! indices for cells, faces, and nodes
!
type :: map_t
! loc_to_glb : local index to global index mapping array
integer, allocatable :: loc_to_glb(:)
end type map_t
!
! CELL_MAP : mappings between local and global cell indices
! NOTE: I believe this mapping array is the only one needed outside
! this module. For example, it is needed for initializing the
! local solution from the global solution that is read in
! from the restart file.
!
type(map_t), public, save, allocatable :: cell_map(:)
!
!
!
! #####################################################################
! ##### EDGE CONNECTIVITY DERIVED TYPES AND ASSOCIATED ARRAYS #####
! #####################################################################
!
! CPU_WITH_EDGE_T : derived type to store the information about
! a partition/processor that contains a given edge
!
type :: cpu_with_edge_t
! partition : grid partition/processor containing this edge
integer :: partition = 0
! loc_edge : index of this edge local this partition
integer :: loc_edge = 0
! num_cells : number of cells on this partition containing this edge
integer :: num_cells = 0
! edgpts : indices for the edge points on the current edge
integer, allocatable :: edgpts(:)
end type cpu_with_edge_t
!
logical(lk), parameter :: use_edgpts = true
!logical(lk), parameter :: use_edgpts = fals
!
! EDGE_T : Derived type to store information for each grid edge
! NOTE: This is different from the derived type
! of the same name in the module geovar
!
type :: edge_t
! is_on_boundary : logical that identifies if this edge is on a
! non-communication boundary face
logical(lk) :: is_on_boundary = fals
! controlling_partition : partition that is responsible for the index
! of this node for the CGNS connectivity
integer :: controlling_partition = 0
! order : solution order of the current edge
integer :: order = 0
! cpu : information for each cpu that contains the current edge
type(cpu_with_edge_t), allocatable :: cpu(:)
end type edge_t
!
!
!
! #####################################################################
! ##### NODE CONNECTIVITY DERIVED TYPES AND ASSOCIATED ARRAYS #####
! #####################################################################
!
! CPU_WITH_NODE_T : derived type to store the information about
! a partition/processor that contains a given node
!
type :: cpu_with_node_t
! partition : grid partition/processor containing this node
integer :: partition = 0
! loc_node : index of this node local this partition
integer :: loc_node = 0
! num_cells : number of cells on this partition containing this edge
integer :: num_cells = 0
end type cpu_with_node_t
!
! NODE_T : Derived type to store information for each grid node
! NOTE: This is different from the derived type
! of the same name in the module geovar
!
type :: node_t
! is_on_boundary : logical that identifies if this edge is on a
! non-communication boundary face
logical(lk) :: is_on_boundary = fals
! controlling_partition : partition that is responsible for the index
! of this node for the CGNS connectivity
integer :: controlling_partition = 0
! cpu : information for each cpu that contains the current node
type(cpu_with_node_t), allocatable :: cpu(:)
end type node_t
!
!
!
! #####################################################################
! ##### MPI COMMUNICATION DERIVED TYPES AND ASSOCIATED ARRAYS #####
! #####################################################################
!
! EXCH_DATA_T : Derived type containing the information needed to communicate
! data between two processes
! NOTE: adj_cpu, send_tag, and recv_tag all default to -1
! because this represents an invalid value if these
! components are used without explicitly redefining
! them to a valid value
!
type :: exch_data_t
! adj_cpu : process/CPU ID for the communication partner
integer(int_mpi) :: adj_cpu = -1_int_mpi
! send_tag : message tag for matching the MPI communication
integer(int_mpi) :: send_tag = -1_int_mpi
! recv_tag : message tag for matching the MPI communication
integer(int_mpi) :: recv_tag = -1_int_mpi
! send_datatype : handle for the MPI user-defined datatype to send data
#ifdef PBS_ENV
_MPI_DATA_TYPE_ :: send_datatype
#else
_MPI_DATA_TYPE_ :: send_datatype = MPI_DATATYPE_NULL
#endif
! recv_datatype : handle for the MPI user-defined datatype to receive data
#ifdef PBS_ENV
_MPI_DATA_TYPE_ :: recv_datatype
#else
_MPI_DATA_TYPE_ :: recv_datatype = MPI_DATATYPE_NULL
#endif
end type exch_data_t
!
! EXCH_FUSP : Array containing all the information needed to communicate the
! face flux point solution variables between adjacent processes.
!
type(exch_data_t), save, allocatable :: exch_fusp(:)
!
! EXCH_FDUSP : Array containing all the information needed to communicate
! the face flux point gradients between adjacent processes.
!
type(exch_data_t), save, allocatable :: exch_fdusp(:)
!
! EXCH_NEUSP : Array containing the information for all the communication
! pairs that need to exchange edge and node solutions
!
type(exch_data_t), save, allocatable :: exch_neusp(:)
!
!
! #########################################
! ##### MODULE GENERIC INTERFACES #####
! #########################################
!
!logical(lk), parameter :: use_mpi_pack = fals
!logical(lk), parameter :: use_mpi_pack = true
!!
!logical(lk), parameter :: send_using_large_buffer = fals
!logical(lk), parameter :: send_using_large_buffer = true
!
!logical(lk), parameter :: send_grid_elem_tags = fals
logical(lk), parameter :: send_grid_elem_tags = true
!
logical(lk), parameter :: send_grid_elem_host_cell = fals
!logical(lk), parameter :: send_grid_elem_host_cell = true
!
!
! #########################################
! ##### MODULE GENERIC INTERFACES #####
! #########################################
!
!interface exchange_modes
! module procedure exchange_modes_r4, exchange_modes_r8
!end interface exchange_modes
!
contains
!
!###############################################################################
!
subroutine create_serial_cell_map(lcell)
!
!.. Formal Arguments ..
integer, intent(in) :: lcell
!
!.. Local Scalars ..
integer :: ierr
!
!.. Local Parameters ..
character(len=*), parameter :: pname = "create_serial_cell_map"
!
continue
!
call debug_timer(entering_procedure,pname)
!
allocate ( cell_map(1:1) , stat=ierr , errmsg=error_message )
call alloc_error(pname,"cell_map",1,__LINE__,__FILE__,ierr, &
error_message,skip_alloc_pause)
!
allocate ( cell_map(1)%loc_to_glb(1:lcell) , source=intseq(1,lcell) , &
stat=ierr , errmsg=error_message )
call alloc_error(pname,"cell_map(1)%loc_to_glb",1,__LINE__,__FILE__,ierr, &
error_message)
!
call debug_timer(leaving_procedure,pname)
!
end subroutine create_serial_cell_map
!
!###############################################################################
!
subroutine partition_grid(npart,metis_option_requested,bface,nodes_of_cell, &
nodes_of_cell_ptr,xyz_nodes,cell_geom,cell_order)
!
!.. Use Statements ..
use ovar, only : continuous_output
!
!.. Formal Arguments ..
integer, intent(in) :: npart
integer, intent(in) :: metis_option_requested
integer, allocatable, dimension(:), intent(inout) :: cell_geom
integer, allocatable, dimension(:), intent(inout) :: cell_order
integer, allocatable, dimension(:), intent(inout) :: nodes_of_cell
integer, allocatable, dimension(:), intent(inout) :: nodes_of_cell_ptr
integer, allocatable, dimension(:,:), intent(inout) :: bface
real(wp), allocatable, dimension(:,:), intent(inout) :: xyz_nodes
!
!.. Local Scalars ..
integer :: lcell,ierr
character(len=200) :: array_name
!
!.. Local Allocatable Arrays ..
integer, allocatable :: cells_with_node(:)
integer, allocatable :: cells_with_node_ptr(:)
integer, allocatable :: cells_surr_cell(:)
integer, allocatable :: cells_surr_cell_ptr(:)
integer, allocatable :: cells_with_edge(:)
integer, allocatable :: cells_with_edge_ptr(:)
integer, allocatable :: nodes_on_edge(:,:)
integer, allocatable :: face_rotation(:,:)
!
integer(idx_t), allocatable :: xadj(:)
integer(idx_t), allocatable :: adjncy(:)
integer(idx_t), allocatable :: epart(:)
!
!.. Local Allocatable Derived-Type Arrays ..
type(node_t), allocatable :: node(:)
type(map_t), allocatable :: node_map(:)
type(edge_t), allocatable :: edge(:)
type(map_t), allocatable :: edge_map(:)
type(face_t), allocatable :: face(:)
type(map_t), allocatable :: face_map(:)
type(flux_pts_t), allocatable :: flx(:)
type(fp_t), allocatable :: fp
!
!.. Local Parameters ..
character(len=*), parameter :: pname = "partition_grid"
!logical(lk), parameter :: use_old_dual_graph = true
logical(lk), parameter :: use_old_dual_graph = fals
!
continue
!
call debug_timer(entering_procedure,pname)
!
! Get the size of the global grid
!
nr = size(xyz_nodes,dim=1)
!
! Get the number of interior grid cells
!
lcell = size(nodes_of_cell_ptr)-1 - size(bface,dim=2)
!
if (i_am_host_root) then
!
! Create the global grid connectivity
!
call memory_pause("Before: calling get_global_connectivity")
call get_global_connectivity(cell_geom,cell_order,bface,xyz_nodes, &
nodes_of_cell,nodes_of_cell_ptr, &
cells_with_node,cells_with_node_ptr, &
cells_surr_cell,cells_surr_cell_ptr, &
cells_with_edge,cells_with_edge_ptr, &
nodes_on_edge,face,fp)
!
! Create the dual graph for the global grid not including ghost cells
!
call memory_pause("Before: calling create_metis_dual_graph")
call create_metis_dual_graph(cells_surr_cell,cells_surr_cell_ptr, &
xadj,adjncy)
!
! Deallocate cells_surr_cell and cells_surr_cell_ptr since they
! are no longer needed
!
if (allocated(cells_surr_cell)) then
deallocate ( cells_surr_cell , stat=ierr , errmsg=error_message )
call alloc_error(pname,"cells_surr_cell",2,__LINE__,__FILE__,ierr, &
error_message)
end if
!
if (allocated(cells_surr_cell_ptr)) then
deallocate ( cells_surr_cell_ptr , stat=ierr , errmsg=error_message )
call alloc_error(pname,"cells_surr_cell_ptr",2,__LINE__,__FILE__,ierr, &
error_message)
end if
!
! Allocate the epart array to store the cell partition numbers
!
allocate ( epart(1:lcell) , source=0_idx_t , &
stat=ierr , errmsg=error_message )
call alloc_error(pname,"epart",1,__LINE__,__FILE__,ierr,error_message)
!if (mypnum == 0) then
! call output_partitions(99,nodes_of_cell,nodes_of_cell_ptr,xyz_nodes)
!end if
!
! Use METIS to partition the grid
! NOTE: ONLY USE THE GLOBAL ROOT PROCESS TO PARTITION THE GRID
!
if (mypnum == glb_root) then
!
#ifdef METIS_4
!
call memory_pause("Before: calling partition_using_metis_4")
call partition_using_metis_4(metis_option_requested,npart, &
cell_geom,cell_order, &
xadj,adjncy,epart)
!
#elif METIS_5
!
call memory_pause("Before: calling partition_using_metis_5")
call partition_using_metis_5(metis_option_requested,npart, &
xadj,adjncy,epart)
!
#else
!
write (error_message,1)
call stop_gfr(abort,pname,__LINE__,__FILE__,error_message)
!
#endif
!
end if
!
! Have the global root process broadcast
! the partitioning to all host roots
!
call mpi_bcast(epart,int(lcell,kind=int_mpi),mpi_inttyp, &
glb_root,host_roots_comm,mpierr)
!
end if
call mpi_barrier(MPI_COMM_WORLD,mpierr)
!
! Deallocate no longer needed METIS arrays
!
if (allocated(xadj)) then
deallocate ( xadj , stat=ierr , errmsg=error_message )
call alloc_error(pname,"xadj",2,__LINE__,__FILE__,ierr,error_message)
end if
!
if (allocated(adjncy)) then
deallocate ( adjncy , stat=ierr , errmsg=error_message )
call alloc_error(pname,"adjncy",2,__LINE__,__FILE__,ierr,error_message)
end if
!
! Distribute partitions across all processors
!
!call mpi_bcast(epart,int(lcell,kind=int_mpi), &
! mpi_inttyp,0_int_mpi,MPI_COMM_WORLD,mpierr)
!
! Output the partition grid to a tecplot file
!
!if (mypnum == 0) then
! call output_partitions(mypnum,nodes_of_cell,nodes_of_cell_ptr, &
! xyz_nodes,epart)
!end if
!call stop_gfr(stop_mpi,pname,__LINE__,__FILE__,"outputing partitions")
!
if (i_am_host_root) then
!
! Create the node and edge arrays
!
if (continuous_output) then
call memory_pause("Before: calling create_node_and_edge_arrays")
call create_node_and_edge_arrays(nodes_of_cell,nodes_of_cell_ptr, &
cells_with_node,cells_with_node_ptr, &
cells_with_edge,cells_with_edge_ptr, &
cell_order,epart,bface,node,edge)
end if
!
! Deallocate nodes_on_edge since it is no longer needed
!
if (allocated(nodes_on_edge)) then
deallocate ( nodes_on_edge , stat=ierr , errmsg=error_message )
call alloc_error(pname,"nodes_on_edge",2,__LINE__,__FILE__,ierr, &
error_message)
end if
!
! Find the faces that separate two adjacent partitions
! and add these faces to boundary faces array
!
call memory_pause("Before: calling find_partition_boundaries")
call find_partition_boundaries(epart,bface,face,cell_geom,cell_order)
!
! Create the flx and face_rotation arrays from fp and the newly
! updated face array that now contains communication boundary faces
!
call create_face_rotations(face,fp,flx,face_rotation)
!
! Deallocate fp since it is no longer needed
!
if (allocated(fp)) then
deallocate ( fp , stat=ierr ,errmsg=error_message )
call alloc_error(pname,"fp",2,__LINE__,__FILE__,ierr,error_message)
end if
!
! Now that we have created boundary conditions for faces
! located on partition boundaries, use epart to create
! mappings from local to global block orderings.
! This also includes creating the reverse mapping arrays
! or the maps from global to local block orderings
!
call memory_pause("Before: calling create_cell_map")
call create_cell_map(npart,epart,cell_map)
!
call memory_pause("Before: calling create_node_map")
call create_node_map(npart,epart,node,cells_with_node, &
cells_with_node_ptr,node_map)
!
call memory_pause("Before: calling create_face_map")
call create_face_map(npart,epart,face,face_map)
!
if (allocated(edge)) then
call memory_pause("Before: calling create_edge_map")
call create_edge_map(npart,epart,edge,cells_with_edge, &
cells_with_edge_ptr,edge_map)
end if
!
! Deallocate cells_with_node and cells_with_node_ptr
! since they are no longer needed
!
if (allocated(cells_with_node)) then
deallocate ( cells_with_node , stat=ierr , errmsg=error_message )
call alloc_error(pname,"cells_with_node",2,__LINE__,__FILE__,ierr, &
error_message)
end if
!
if (allocated(cells_with_node_ptr)) then
deallocate ( cells_with_node_ptr , stat=ierr , errmsg=error_message )
call alloc_error(pname,"cells_with_node_ptr",2,__LINE__,__FILE__,ierr, &
error_message)
end if
!
#ifdef DEBUG_ON
!call output_mapping_arrays(node_map,edge_map,face_map,cell_map)
#endif
!
end if
call mpi_barrier(MPI_COMM_WORLD,mpierr)
!
! Deallocate epart, cells_with_edge, and cells_with_edge_ptr
! since they are no longer needed
!
if (allocated(epart)) then
deallocate ( epart , stat=ierr , errmsg=error_message )
call alloc_error(pname,"epart",2,__LINE__,__FILE__,ierr,error_message)
end if
!
if (allocated(cells_with_edge)) then
deallocate ( cells_with_edge , stat=ierr , errmsg=error_message )
call alloc_error(pname,"cells_with_edge",2,__LINE__,__FILE__,ierr, &
error_message)
end if
!
if (allocated(cells_with_edge_ptr)) then
deallocate ( cells_with_edge_ptr , stat=ierr , errmsg=error_message )
call alloc_error(pname,"cells_with_edge_ptr",2,__LINE__,__FILE__,ierr, &
error_message)
end if
!
! Have the root processor broadcast the flx, face_rotation
! and the mapping arrays
!
!if (use_mpi_pack) then
! call pack_fp_and_mapping_arrays(npart,node_map,edge_map,face_map, &
! cell_map,flx,face_rotation)
!else
! if (send_using_large_buffer) then
! call bcast_fp_and_mapping_arrays(npart,node_map,edge_map,face_map, &
! cell_map,flx,face_rotation)
! else
call bcast_each_fp_and_map_array(npart,node_map,edge_map,face_map, &
cell_map,flx,face_rotation)
! end if
!end if
!
!call parallel_memory_usage(iout)
!
! Before we localize everything, we need to create MPI datatypes
! to collectively receive localized data from each processor into
! a shared global form.
! NOTE: The root processor will be the only processor receiving this global
! data so it is the only processor that needs to create these datatypes
!
if (mypnum == glb_root) then
call create_global_collective_datatypes(cell_map,cell_geom,cell_order)
end if
!
! Before we start localizing everything, localize bc_conflict.
! Go through each bc conflict and get the partition to which the
! boundary face belongs and then localize the boundary face indices.
! If we find a face that is on our partition, mark the conflict so
! that we know we are involved in the communication for this conflict.
!
!call localize_bc_conflict(bface,face_map)
!
! Now that the mappings have been created, we
! need to localize the grid on each processor
!
!call memory_pause("Before: calling localize_grid")
!call localize_grid(nodes_of_cell,nodes_of_cell_ptr,bface, &
! xyz_nodes,cell_geom,cell_order, &
! face,node_map,face_map,cell_map)
call memory_pause("Before: calling root_localize_grid")
call root_localizes_grid(nodes_of_cell,nodes_of_cell_ptr,bface, &
xyz_nodes,cell_geom,cell_order, &
face,node_map,face_map,cell_map)
!
! Deallocate face since it is no longer needed
!
if (allocated(face)) then
deallocate ( face , stat=ierr ,errmsg=error_message )
call alloc_error(pname,"face",2,__LINE__,__FILE__,ierr,error_message)
end if
!
! Create MPI datatypes for exchanging node and edge data between processes
!
call memory_pause("Before: calling create_node_and_edge_datatypes")
call bcast_node_and_edge_arrays(node,edge)
!call report_parallel_memory_usage(xadj,adjncy,epart,node,edge,face, &
! cells_with_node,cells_with_node_ptr, &
! cells_surr_cell,cells_surr_cell_ptr, &
! cells_with_edge,cells_with_edge_ptr, &
! nodes_on_edge,face_rotation,flx,fp, &
! node_map,edge_map,face_map)
!
if (continuous_output) then
call create_node_and_edge_datatypes(node,node_map,edge,edge_map)
end if
!
! Deallocate the node and edge arrays and the node and edge
! mapping arrays since they are no longer needed
!
if (allocated(node)) then
deallocate ( node , stat=ierr ,errmsg=error_message )
call alloc_error(pname,"node",2,__LINE__,__FILE__,ierr,error_message)
end if
!
if (allocated(edge)) then
deallocate ( edge , stat=ierr ,errmsg=error_message )
call alloc_error(pname,"edge",2,__LINE__,__FILE__,ierr,error_message)
end if
!
if (allocated(node_map)) then
deallocate ( node_map , stat=ierr , errmsg=error_message )
call alloc_error(pname,"node_map",2,__LINE__,__FILE__,ierr,error_message)
end if
!
if (allocated(edge_map)) then
deallocate ( edge_map , stat=ierr , errmsg=error_message )
call alloc_error(pname,"edge_map",2,__LINE__,__FILE__,ierr,error_message)
end if
!
! Finally, create MPI datatypes for exchanging face data between processes
!
call memory_pause("Before: calling create_face_nonblocking_datatypes")
call create_face_nonblocking_datatypes(bface,face_map,flx,face_rotation)
!call create_face_nonblocking_datatypes(bface,face,face_map,flx,face_rotation)
!
! Deallocate face, face_map, flx, and face_rotation
! since they are no longer needed
!
!if (mypnum == 0) then
! write (iout,*)
! write (iout,*) "PAUSING BEFORE DEALLOCATING ANY OF THE FACE ARRAYS!"
! write (iout,*) "PRESS ANY KEY TO EXECUTE ABORT!"
! read (*,*)
!end if
!call mpi_barrier(MPI_COMM_WORLD,mpierr)
if (allocated(face)) then
deallocate ( face , stat=ierr ,errmsg=error_message )
call alloc_error(pname,"face",2,__LINE__,__FILE__,ierr,error_message)
end if
!
!if (mypnum == 0) then
! write (iout,*)
! write (iout,*) "PAUSING AFTER DEALLOCATING THE FACE ARRAY!"
! write (iout,*) "PRESS ANY KEY TO EXECUTE ABORT!"
! read (*,*)
!end if
!call mpi_barrier(MPI_COMM_WORLD,mpierr)
if (allocated(face_map)) then
deallocate ( face_map , stat=ierr , errmsg=error_message )
call alloc_error(pname,"face_map",2,__LINE__,__FILE__,ierr,error_message)
end if
!
!if (mypnum == 0) then
! write (iout,*)
! write (iout,*) "PAUSING AFTER DEALLOCATING THE FACE_MAP ARRAY!"
! write (iout,*) "PRESS ANY KEY TO EXECUTE ABORT!"
! read (*,*)
!end if
!call mpi_barrier(MPI_COMM_WORLD,mpierr)
if (allocated(flx)) then
deallocate ( flx , stat=ierr ,errmsg=error_message )
call alloc_error(pname,"flx",2,__LINE__,__FILE__,ierr,error_message)
end if
!
!if (mypnum == 0) then
! write (iout,*)
! write (iout,*) "PAUSING AFTER DEALLOCATING THE FLX ARRAY!"
! write (iout,*) "PRESS ANY KEY TO EXECUTE ABORT!"
! read (*,*)
!end if
!call mpi_barrier(MPI_COMM_WORLD,mpierr)
if (allocated(face_rotation)) then
deallocate ( face_rotation , stat=ierr ,errmsg=error_message )
call alloc_error(pname,"face_rotation",2,__LINE__,__FILE__,ierr, &
error_message)
end if
!
!if (mypnum == 0) then
! write (iout,*)
! write (iout,*) "PAUSING AFTER DEALLOCATING THE FACE_ROTATION ARRAY!"
! write (iout,*) "PRESS ANY KEY TO EXECUTE ABORT!"
! read (*,*)
!end if
!
call debug_timer(leaving_procedure,pname)
!
!if (mypnum == 0) then
! write (iout,*)
! write (iout,*) "PAUSING BEFORE ABORTING! "
! write (iout,*) "PRESS ANY KEY TO EXECUTE ABORT!"
! read (*,*)
!end if
!call mpi_barrier(MPI_COMM_WORLD,mpierr)
!call mpi_barrier(MPI_COMM_WORLD,mpierr)
!call stop_gfr(stop_mpi,pname,__LINE__,__FILE__, &
! "TEMPORARY STOP AFTER PARTITIONING THE GRID!!")
!
! Format Statements
!
1 format(" It seems that the code was compiled without either of the METIS", &
" pre-processor options being defined. One of these options needs", &
" to be defined in order to partition the grid and allow the use", &
" of multiple processors.")
!
end subroutine partition_grid
!
!###############################################################################
!
subroutine partition_using_metis_4(metis_option_requested,npart, &
cell_geom,cell_order, &
xadj,adjncy,epart)
!
!.. Use Statements ..
use iso_c_binding, only : c_float
use iso_c_binding, only : c_double
!
!.. Formal Arguments ..
integer, intent(in) :: metis_option_requested
integer, intent(in) :: npart
integer, intent(in) :: cell_geom(:)
integer, intent(in) :: cell_order(:)
integer(idx_t), intent(in) :: xadj(:)
integer(idx_t), intent(in) :: adjncy(:)
integer(idx_t), intent(inout) :: epart(:)
!
!.. Local Scalars ..
integer :: n,ip,ierr,lcell
integer :: wgtflag,numflag,vol
integer :: metis_option
logical(ldk) :: passes_inspection
character(len=19) :: method
character(len=12) :: weight
character(len=20) :: minmzd
!
!.. Local Arrays ..
integer, dimension(1:5) :: metis_option_hierarchy
integer, dimension(1:5) :: options
real(c_float), dimension(1:npart) :: tpwgts
!
!.. Local Allocatable Arrays ..
integer, allocatable, dimension(:) :: vwgt
integer, allocatable, dimension(:) :: adjwgt
integer, allocatable, dimension(:) :: vsize
!
!.. Local Parameters ..
character(len=*), parameter :: pname = "partition_using_metis_4"
!
continue
#ifdef METIS_4
!
call debug_timer(entering_procedure,pname)
!
lcell = size(epart)
!
! Create the priority list for METIS options to try
!
metis_option_hierarchy = [metis_option_requested,3,2,1,4]
!
! One-pass loop to allow for easy error handling if none of the
! partitioning methods produce a quality grid partitioning
!
part_loop: do ip = 1,1
!
do n = 1,size(metis_option_hierarchy)
!
! Get the METIS option to try based on the priority list
!
metis_option = metis_option_hierarchy(n)
!
! Create the weights for the vertices of the dual graph
!
call create_grid_weights(metis_option,1,lcell,size(adjncy), &
vwgt,adjwgt,vsize,wgtflag,cell_geom,cell_order)
!
! Define other METIS variables
!
numflag = 1
options(:) = 0
vol = 0
!
! Partition the grid using one of the METIS routines
!
if (metis_option == 1) then
!
! This METIS routine computes non-weighted partitions using a
! multilevel recursive bisection while minimizing the edge cut.
!
method = "recursive bisection"
weight = "non-weighted"
minmzd = " edge cut "
!
call metis_partgraphrecursive(lcell,xadj,adjncy,vwgt,adjwgt,wgtflag, &
numflag,npart,options,vol,epart)
!
else if (metis_option == 2) then
!
! This METIS routine computes non-weighted partitions using
! a multilevel k-way partitioning algorithm while minimizing
! the edge cut.
!
method = "k-way partitioning "
weight = "non-weighted"
minmzd = " edge cut "
!
call metis_partgraphkway(lcell,xadj,adjncy,vwgt,adjwgt,wgtflag, &
numflag,npart,options,vol,epart)
!
else if (metis_option == 3) then
!
! This METIS routine computes non-weighted partitions using a
! a multilevel k-way partitioning algorithm while minimizing
! the total communication volume.
!
minmzd = "communication volume"
weight = "non-weighted"
method = "k-way partitioning "
!
call metis_partgraphvkway(lcell,xadj,adjncy,vwgt,vsize,wgtflag, &
numflag,npart,options,vol,epart)
!
else if (metis_option == 4) then
!
! This METIS routine computes weighted partitions using a
! multilevel k-way partitioning algorithm while minimizing
! the total communication volume.
!
method = "k-way partitioning "
weight = " weighted "
minmzd = "communication volume"
!
! Compute the prescribed partition weights
! NOTE: The array tpwgts needs to be the Fortran real
! kind that corresponds to the C float type
!
!##############################################################
!##############################################################
!
! For now, set the partition weights equal and make sure they
! sum to one. After looking at the METIS source code, this is
! actually equivalent to just calling metis_partgraphvkway.
!
tpwgts(:) = real(one,c_float)/real(npart,c_float)
tpwgts(:) = tpwgts(:)/sum(tpwgts)
!
!##############################################################
!##############################################################
!
call metis_wpartgraphvkway(lcell,xadj,adjncy,vwgt,vsize,wgtflag, &
numflag,npart,tpwgts,options,vol,epart)
!
else
!
! The METIS option requested is invalid!
!
write (error_message,103)
call stop_gfr(abort,pname,__LINE__,__FILE__,error_message)
!
end if
!
! I dont trust that the METIS routines will compute identical
! partitioning on each processor so have the root process check
! its local partitioning for errors and then broadcast that
! partitioning to the other processes if none are found.
!
!if (mypnum == 0) then
call check_quality_of_partitioning(npart,epart, &
passes_inspection)
!end if
!
! Broadcast the logical passes_inspection to all processors so
! that they all will know whether to exit part_loop.
!
!call mpi_bcast(passes_inspection,1_int_mpi,MPI_LOGICAL, &
! 0_int_mpi,MPI_COMM_WORLD,mpierr)
!
! If the partitioning passes inspection, have the root processor
! output the results of the partitioning and exit from part_loop.
!
if (passes_inspection) then
if (mypnum == 0) write (iout,101) method,npart,weight,minmzd,vol
exit part_loop
end if
!
end do
!
write (error_message,102)
call stop_gfr(abort,pname,__LINE__,__FILE__,error_message)
!
end do part_loop
!
! Deallocate the weight arrays before we leave
!
if (allocated(vwgt)) then
deallocate ( vwgt , stat=ierr , errmsg=error_message )
call alloc_error(pname,"vwgt",2,__LINE__,__FILE__,ierr,error_message)
end if
!
if (allocated(adjwgt)) then
deallocate ( adjwgt , stat=ierr , errmsg=error_message )
call alloc_error(pname,"adjwgt",2,__LINE__,__FILE__,ierr,error_message)
end if
!
if (allocated(vsize)) then
deallocate ( vsize , stat=ierr , errmsg=error_message )
call alloc_error(pname,"vsize",2,__LINE__,__FILE__,ierr,error_message)
end if
!
call debug_timer(leaving_procedure,pname)
!
! Format statements
!
101 format (/,'-------------------------------------------------------',//, &
' Used METIS 4.0 to partition the graph ',/, &
' using a multilevel ',a,' algorithm ',/, &
' into ',i0,' ',a,' partitions. ',/, &
' The minimized ',a,' was ',i0,'. ',//, &
'-------------------------------------------------------',/)
102 format (" None of the partitioning methods available in METIS 4.0 were", &
" able to find a quality partitioning of the grid!")
103 format (" The METIS partitioning method requested in the input file is", &
" invalid!")
!
#endif
end subroutine partition_using_metis_4
!
!###############################################################################
!
subroutine partition_using_metis_5(metis_option_requested,npart, &
xadj,adjncy,epart)
!
!.. Formal Arguments ..