Skip to content

Commit e2af4c7

Browse files
authored
Merge pull request #666 from OP2/connorjward/add-nbytes
Connorjward/add nbytes
2 parents dc5f3bc + 89c9dec commit e2af4c7

2 files changed

Lines changed: 22 additions & 0 deletions

File tree

pyop2/configuration.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ class Configuration(dict):
118118
("PYOP2_LDFLAGS", str, ""),
119119
"simd_width":
120120
("PYOP2_SIMD_WIDTH", int, 1),
121+
"extra_info":
122+
("PYOP2_EXTRA_INFO", bool, False),
121123
"vectorization_strategy":
122124
("PYOP2_VECT_STRATEGY", str, "cross-element"),
123125
"alignment":

pyop2/parloop.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,10 @@ def _compute(self, part):
187187
188188
:arg part: The :class:`SetPartition` to compute over.
189189
"""
190+
if configuration["extra_info"]:
191+
nbytes = self.comm.allreduce(self.nbytes)
192+
if self.comm.Get_rank() == 0:
193+
print("{0}_BYTES= {1}".format(self.global_kernel.name, nbytes))
190194
with self._compute_event():
191195
PETSc.Log.logFlops(part.size*self.num_flops)
192196
self.global_kernel(self.comm, part.offset, part.offset+part.size, *self.arglist)
@@ -195,6 +199,22 @@ def _compute(self, part):
195199
def num_flops(self):
196200
return self.global_kernel.num_flops(self.iterset)
197201

202+
@cached_property
203+
def nbytes(self):
204+
nbytes = 0
205+
seen = set()
206+
for arg in self.arguments:
207+
nbytes += arg.data.nbytes
208+
for map_ in arg.maps:
209+
if map_ is None:
210+
continue
211+
for k in map_._kernel_args_:
212+
if k in seen:
213+
continue
214+
nbytes += map_.values.nbytes
215+
seen.add(k)
216+
return nbytes
217+
198218
@mpi.collective
199219
def compute(self):
200220
# Parloop.compute is an alias for Parloop.__call__

0 commit comments

Comments
 (0)