Rheolef  7.1
an efficient C++ finite element environment
mpi_assembly_begin.h
Go to the documentation of this file.
1 #ifndef _RHEO_MPI_ASSEMBLY_BEGIN_H
2 #define _RHEO_MPI_ASSEMBLY_BEGIN_H
23 
24 namespace rheolef {
25 
26 /*F:
27 NAME: mpi_assembly_begin -- for array or matrix (@PACKAGE@ @VERSION@)
28 DESCRIPTION:
29  Start a dense array or a sparse matrix assembly.
30 COMPLEXITY:
31  Assume that stash has indexes in increasing order.
32  cpu complexity : O(stash.size + nproc)
33  memory complexity : O(stash.size + nproc)
34  msg size complexity : O(stash.size + nproc)
35 
36  When assembling a finite element matrix, the stash.size is at boundaries
37  of the mesh partition, and stash.size = O(nrow^alpha), where alpha=(d-1)/d
38  and d=2,3. Using nproc <= O(nrow^alpha) is performant.
39 NOTE:
40  The stash may be sorted by increasing nows and column.
41 
42  Inspirated from Petsc (petsc/src/vec/vec/impls/mpi/pdvec.c), here with
43  a pre-sorted stash, thanks to the stl::map data structure.
44 AUTHORS:
45  LMC-IMAG, 38041 Grenoble cedex 9, France
46  | Pierre.Saramito@imag.fr
47 DATE: 23 march 1999
48 END:
49 */
50 
51 //<mpi_assembly_begin:
52 template <
53  class Stash,
54  class Message,
55  class InputIterator>
56 typename Stash::size_type
58  // input:
59  const Stash& stash,
60  InputIterator first_stash_idx, // wrapper in shash
61  InputIterator last_stash_idx,
62  const distributor& ownership,
63  // ouput:
64  Message& receive, // buffer
65  Message& send) // buffer
66 {
67  typedef typename Stash::size_type size_type;
68 
69  mpi::communicator comm = ownership.comm();
70  size_type my_proc = ownership.process();
71  size_type nproc = ownership.n_process();
73 
74  // ----------------------------------------------------------------
75  // 1) count the messages contained in stash by process id
76  // ----------------------------------------------------------------
77  // assume that stash elements are sorted by increasing stash_idx (e.g. stash = stl::map)
78  // cpu complexity = O(stash.size + nproc)
79  // mem complexity = O(stash.size + nproc)
80  std::vector<size_type> msg_size(nproc, 0);
81  std::vector<size_type> msg_mark(nproc, 0);
82  size_type send_nproc = 0;
83  {
84  size_type iproc = 0;
85  size_type i = 0;
86  for (InputIterator iter_idx = first_stash_idx; iter_idx != last_stash_idx; iter_idx++, i++) {
87  for (; iproc < nproc; iproc++) {
88  if (*iter_idx >= ownership[iproc] && *iter_idx < ownership[iproc+1]) {
89  msg_size [iproc]++;
90  if (!msg_mark[iproc]) {
91  msg_mark[iproc] = 1;
92  send_nproc++;
93  }
94  break;
95  }
96  }
97  assert_macro (iproc != nproc, "bad stash data: index "<<*iter_idx<<" out of range [0:"<<ownership[nproc]<<"[");
98  }
99  } // end block
100  // ----------------------------------------------------------------
101  // 2) avoid to send message to my-proc in counting
102  // ----------------------------------------------------------------
103  if (msg_size [my_proc] != 0) {
104  msg_size [my_proc] = 0;
105  msg_mark [my_proc] = 0;
106  send_nproc--;
107  }
108  // ----------------------------------------------------------------
109  // 3) compute number of messages to be send to my_proc
110  // ----------------------------------------------------------------
111  // msg complexity : O(nproc) or O(log(nproc)), depending on reduce implementation
112  std::vector<size_type> work (nproc);
113  mpi::all_reduce (
114  comm,
115  msg_mark.begin().operator->(),
116  nproc,
117  work.begin().operator->(),
118  std::plus<size_type>());
119  size_type receive_nproc = work [my_proc];
120  // ----------------------------------------------------------------
121  // 4) compute messages max size to be send to my_proc
122  // ----------------------------------------------------------------
123  // msg complexity : O(nproc) or O(log(nproc)), depending on reduce implementation
124  mpi::all_reduce (
125  comm,
126  msg_size.begin().operator->(),
127  nproc,
128  work.begin().operator->(),
129  mpi::maximum<size_type>());
130  size_type receive_max_size = work [my_proc];
131  // ----------------------------------------------------------------
132  // 5) post receive: exchange the buffer adresses between processes
133  // ----------------------------------------------------------------
134  // Each message will consist of ordered pairs (global index,value).
135  // since we don't know how long each indiidual message is,
136  // we allocate the largest : receive_nproc*receive_max_size
137  // potentially, this is a lot of wasted space
138  // TODO: how to optimize the receive.data buffer ?
139  // cpu complexity : O(nproc)
140  // mem complexity : O(nproc*(stash.size/nproc)) = O(stash.size), worst case ?
141  // msg complexity : O(nproc)
142  receive.waits.clear();
143  receive.data.resize (receive_nproc*receive_max_size);
144  for (size_t i_receive = 0; i_receive < receive_nproc; i_receive++) {
145  mpi::request i_req = comm.irecv (
146  mpi::any_source,
147  tag,
148  receive.data.begin().operator->() + i_receive*receive_max_size,
149  receive_max_size);
150  receive.waits.push_back (std::make_pair(i_receive, i_req));
151  }
152  // ----------------------------------------------------------------
153  // 6) copy stash in send buffer
154  // ----------------------------------------------------------------
155  // since the stash is sorted by increasing order => simple copy
156  // cpu complexity : O(stash.size)
157  // mem complexity : O(stash.size)
158  send.data.resize (stash.size());
159  copy (stash.begin(), stash.end(), send.data.begin());
160  // ---------------------------------------------------------------------------
161  // 7) do send
162  // ---------------------------------------------------------------------------
163  // cpu complexity : O(nproc)
164  // mem complexity : O(send_nproc) \approx O(nproc), worst case
165  // msg complexity : O(stash.size)
166  send.waits.clear();
167  {
168  size_type i_send = 0;
169  size_type i_start = 0;
170  for (size_type iproc = 0; iproc < nproc; iproc++) {
171  size_type i_msg_size = msg_size[iproc];
172  if (i_msg_size == 0) continue;
173  mpi::request i_req = comm.isend (
174  iproc,
175  tag,
176  send.data.begin().operator->() + i_start,
177  i_msg_size);
178  send.waits.push_back(std::make_pair(i_send,i_req));
179  i_send++;
180  i_start += i_msg_size;
181  }
182  } // end block
183  return receive_max_size;
184 }
185 //>mpi_assembly_begin:
186 } // namespace rheolef
187 #endif //_RHEO_MPI_ASSEMBLY_BEGIN_H
field::size_type size_type
Definition: branch.cc:425
see the distributor page for the full documentation
Definition: distributor.h:62
size_type n_process() const
number of processes
Definition: distributor.h:169
static tag_type get_new_tag()
returns a new tag
Definition: distributor.cc:133
size_type process() const
current process id
Definition: distributor.h:179
const communicator_type & comm() const
Definition: distributor.h:145
size_t size_type
Definition: basis_get.cc:76
#define assert_macro(ok_condition, message)
Definition: dis_macros.h:113
This file is part of Rheolef.
Stash::size_type mpi_assembly_begin(const Stash &stash, InputIterator first_stash_idx, InputIterator last_stash_idx, const distributor &ownership, Message &receive, Message &send)