Skip to content

Commit 903fdab

Browse files
mlouboutFabioLuporini
authored andcommitted
compiler: improve lifting processing to avoid aliases missplacement
1 parent abc1879 commit 903fdab

7 files changed

Lines changed: 51 additions & 29 deletions

File tree

conftest.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,3 +479,9 @@ def check_array(array, exp_halo, exp_shape, rotate=False):
479479

480480
assert tuple(array.halo) == exp_halo
481481
assert tuple(shape) == tuple(exp_shape)
482+
483+
484+
# Main body in Operator IET, depending on ISA
485+
def body0(op):
486+
bidx = 0 if 'sse' not in configuration['platform'].known_isas else 1
487+
return op.body.body[bidx]

devito/passes/clusters/aliases.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ def _aliases_from_clusters(self, cgroup, exclude, meta):
127127
for mapper in self._generate(cgroup, exclude):
128128
# Clusters -> AliasList
129129
found = collect(mapper.extracted, meta.ispace, self.opt_minstorage)
130+
if not found:
131+
continue
130132
exprs, aliases = self._choose(found, cgroup, mapper)
131133

132134
# AliasList -> Schedule
@@ -147,7 +149,7 @@ def _aliases_from_clusters(self, cgroup, exclude, meta):
147149
# Schedule -> [Clusters]_k
148150
processed, subs = lower_schedule(schedule, meta, self.sregistry,
149151
self.opt_ftemps, self.opt_min_dtype,
150-
self.opt_minmem)
152+
self.opt_minmem, nclusters=len(cgroup))
151153

152154
# [Clusters]_k -> [Clusters]_k (optimization)
153155
if self.opt_multisubdomain:
@@ -272,7 +274,6 @@ def _do_generate(self, exprs, exclude, cbk_search, cbk_compose=None):
272274
free_symbols = i.free_symbols
273275
if {a.function for a in free_symbols} & exclude:
274276
continue
275-
276277
mapper.add(i, make, terms)
277278

278279
return mapper
@@ -853,7 +854,7 @@ def optimize_schedule_rotations(schedule, sregistry):
853854

854855

855856
def lower_schedule(schedule, meta, sregistry, opt_ftemps, opt_min_dtype,
856-
opt_minmem):
857+
opt_minmem, nclusters=1):
857858
"""
858859
Turn a Schedule into a sequence of Clusters.
859860
"""
@@ -929,20 +930,21 @@ def lower_schedule(schedule, meta, sregistry, opt_ftemps, opt_min_dtype,
929930
# Degenerate case: scalar expression
930931
assert writeto.size == 0
931932

932-
guards = None
933933
is_cond = any(isinstance(d, (SubsamplingFactor, ConditionalDimension))
934934
for d in pivot.free_symbols)
935-
if meta.guards and is_cond:
935+
if meta.guards and is_cond and nclusters > 1:
936936
# Scalar alias that depends on a guard, unsafe to lift out of the guard
937937
# Do not alias
938938
expression = None
939939
callback = lambda idx: uxreplace(pivot, subs) # noqa: B023
940940
else:
941941
dtype = sympy_dtype(pivot, base=meta.dtype, smin=opt_min_dtype)
942-
obj = Temp(name=name, dtype=dtype)
942+
obj = Temp(name=name, dtype=dtype, is_const=True)
943943
expression = Eq(obj, uxreplace(pivot, subs))
944944

945945
callback = lambda idx: obj # noqa: B023
946+
# Only keep the guard if there is no cross-cluster reuse of the scalar
947+
guards = meta.guards if nclusters == 1 else None
946948

947949
# Create the substitution rules for the aliasing expressions
948950
subs.update({

devito/passes/clusters/misc.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,14 @@ def callback(self, clusters, prefix):
102102
# unless the guard is for an outer dimension
103103
guards = {} if c.is_scalar and not (prefix[:-1] and c.guards) else c.guards
104104

105-
lifted.append(c.rebuild(ispace=ispace, properties=properties, guards=guards))
105+
_lifted = c.rebuild(ispace=ispace, properties=properties, guards=guards)
106+
if guards and clusters[max(n-1, 0)].guards != guards and _lifted.is_scalar:
107+
# Heuristic: if the lifted Cluster has different guards than the
108+
# previous one, then we are likely to end up with a separate
109+
# Cluster, hence give up on lifting
110+
processed.append(_lifted)
111+
else:
112+
lifted.append(_lifted)
106113

107114
return lifted + processed
108115

examples/performance/01_gpu.ipynb

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,13 @@
142142
"name": "stderr",
143143
"output_type": "stream",
144144
"text": [
145-
"NUMA domain count autodetection failed, assuming 1\n",
146-
"Operator `Kernel` ran in 0.01 s\n",
145+
"NUMA domain count autodetection failed, assuming 1\n"
146+
]
147+
},
148+
{
149+
"name": "stderr",
150+
"output_type": "stream",
151+
"text": [
147152
"Operator `Kernel` ran in 0.01 s\n"
148153
]
149154
}
@@ -292,9 +297,9 @@
292297
" const int x_stride0 = x_fsz0*y_fsz0;\n",
293298
" const int y_stride0 = y_fsz0;\n",
294299
"\n",
295-
" float r0 = 1.0F/dt;\n",
296-
" float r1 = 1.0F/(h_x*h_x);\n",
297-
" float r2 = 1.0F/(h_y*h_y);\n",
300+
" const float r0 = 1.0F/dt;\n",
301+
" const float r1 = 1.0F/(h_x*h_x);\n",
302+
" const float r2 = 1.0F/(h_y*h_y);\n",
298303
"\n",
299304
" for (int time = time_m; time <= time_M; time += 1)\n",
300305
" {\n",
@@ -340,7 +345,7 @@
340345
"name": "python",
341346
"nbconvert_exporter": "python",
342347
"pygments_lexer": "ipython3",
343-
"version": "3.13.5"
348+
"version": "3.13.11"
344349
}
345350
},
346351
"nbformat": 4,

tests/test_dse.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
from sympy import Mul # noqa
66

77
from conftest import ( # noqa
8-
_R, EVAL, assert_blocking, assert_structure, check_array, get_arrays, get_params,
9-
skipif
8+
_R, EVAL, assert_blocking, assert_structure, body0, check_array, get_arrays,
9+
get_params, skipif
1010
)
1111
from devito import ( # noqa
1212
NODE, Abs, ConditionalDimension, Constant, DefaultDimension, Derivative, Dimension,
@@ -348,8 +348,8 @@ def test_scalar_cond(self):
348348
trees = retrieve_iteration_tree(op)
349349

350350
assert len(trees) == 3
351-
assert_structure(op, ['t', 't,x,y', 't,x,y'], 'txyxy')
352-
assert trees[0].dimensions == [time]
351+
assert_structure(op, ['t,x,y', 't', 't,x,y'], 'txyxy')
352+
assert trees[1].dimensions == [time]
353353

354354

355355
class TestAliases:
@@ -2552,6 +2552,7 @@ def test_invariants_with_conditional(self):
25522552
eqn = Eq(u, u - (cos(time_sub * factor * f) * uf))
25532553

25542554
op = Operator(eqn, opt='advanced')
2555+
25552556
assert_structure(op, ['t', 't,fd', 't,fd,x,y'], 't,fd,x,y')
25562557
# Make sure it compiles
25572558
_ = op.cfunction
@@ -2700,10 +2701,12 @@ def test_split_cond(self):
27002701
eq2 = Eq(u.forward, u.forward + cos(time), implicit_dims=ct)
27012702

27022703
op = Operator([eq0, eq1, eq2])
2704+
op(time=5)
2705+
27032706
cond = FindNodes(Conditional).visit(op)
27042707
assert len(cond) == 3
27052708
# The alias should have been lifted out of the condition
2706-
assert 'float r0 = cos(time);' in str(op.body.body[0])
2709+
assert 'float r0 = cos(time);' in str(body0(op))
27072710
scalars = [i for i in FindSymbols().visit(op) if isinstance(i, Temp)]
27082711
assert len(scalars) == 1
27092712

@@ -2721,10 +2724,12 @@ def test_split_cond_multi_alias(self):
27212724
eq2 = Eq(u.forward, u.forward + cos(time) - sin(time), implicit_dims=ct)
27222725

27232726
op = Operator([eq0, eq1, eq2])
2727+
op(time=5)
2728+
27242729
cond = FindNodes(Conditional).visit(op)
27252730
assert len(cond) == 3
27262731
# The alias should have been lifted out of the condition
2727-
assert 'float r3 = cos(time);' in str(op.body.body[0])
2732+
assert 'float r3 = cos(time);' in str(body0(op))
27282733
scalars = [i for i in FindSymbols().visit(op) if isinstance(i, Temp)]
27292734
assert len(scalars) == 5
27302735

@@ -2743,6 +2748,7 @@ def test_multi_cond_no_split(self):
27432748
eq2 = Eq(u.forward, u.forward - sin(time), implicit_dims=ct)
27442749

27452750
op = Operator([eq0, eq1, eq2])
2751+
op(time=5)
27462752

27472753
assert_structure(
27482754
op,
@@ -2751,7 +2757,7 @@ def test_multi_cond_no_split(self):
27512757
)
27522758

27532759
scalars = [i for i in FindSymbols().visit(op) if isinstance(i, Temp)]
2754-
assert len(scalars) == 4
2760+
assert len(scalars) == 3
27552761

27562762
def test_alias_with_conditional(self):
27572763
grid = Grid((11, 11))
@@ -2767,6 +2773,8 @@ def test_alias_with_conditional(self):
27672773
eq2 = Eq(u.forward, u.forward + cos(ct), implicit_dims=ct)
27682774

27692775
op = Operator([eq0, eq1, eq2])
2776+
op(time=5)
2777+
27702778
cond = FindNodes(Conditional).visit(op)
27712779
assert len(cond) == 3
27722780

tests/test_mpi.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pytest
55
from test_dse import TestTTI
66

7-
from conftest import _R, assert_blocking, assert_structure
7+
from conftest import _R, assert_blocking, assert_structure, body0
88
from devito import (
99
NODE, Buffer, ConditionalDimension, Constant, CustomDimension, DefaultDimension,
1010
Dimension, Eq, Function, Grid, Inc, Ne, Operator, PrecomputedSparseFunction,
@@ -24,12 +24,6 @@
2424
from examples.seismic.acoustic import acoustic_setup
2525

2626

27-
# Main body in Operator IET, depending on ISA
28-
def body0(op):
29-
bidx = 0 if 'sse' not in configuration['platform'].known_isas else 1
30-
return op.body.body[bidx]
31-
32-
3327
class TestDistributor:
3428

3529
@pytest.mark.parallel(mode=[2, 4])

tests/test_symbolics.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ def test_func_of_indices():
5858

5959

6060
@pytest.mark.parametrize('dtype,expected', [
61-
(np.float32, "float r0 = 1.0F/h_x;"),
62-
(np.float64, "double r0 = 1.0/h_x;")
61+
(np.float32, "const float r0 = 1.0F/h_x;"),
62+
(np.float64, "const double r0 = 1.0/h_x;")
6363
])
6464
def test_floatification_issue_1627(dtype, expected):
6565
"""

0 commit comments

Comments
 (0)