Skip to content

Commit d102cba

Browse files
authored
Improve CFP by removing rawNewInfos (WebAssembly#7892)
For the purposes of CFP, there's nothing fundamentally different between a set on an exact reference and a value set by allocation. CFP's use of the allocation values without considering all exact sets was therefore an unnecessary complication that restricted CFP's optimizing power. Expand optimizeUsingRefTest to optimize mutable fields, including those that have been set, by using the full available information instead of just the allocation values. Handle copies more judiciously by propagating once to find copied values and then propagate again while taking those copied values into account. This scheme can be extended in the future to precisely handle copies between different fields and types as well. Also optimize siblings better by propagating first down and then up rather than propagating in both directions at once. This avoids unnecessarily propagating set values to siblings.
1 parent 07e02d2 commit d102cba

File tree

3 files changed

+321
-113
lines changed

3 files changed

+321
-113
lines changed

src/passes/ConstantFieldPropagation.cpp

Lines changed: 65 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -91,15 +91,15 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
9191
// subtyping and new infos (information about struct.news).
9292
std::unique_ptr<Pass> create() override {
9393
return std::make_unique<FunctionOptimizer>(
94-
propagatedInfos, subTypes, rawNewInfos, refTest);
94+
propagatedInfos, refTestInfos, subTypes, refTest);
9595
}
9696

9797
FunctionOptimizer(const PCVStructValuesMap& propagatedInfos,
98+
const PCVStructValuesMap& refTestInfos,
9899
const SubTypes& subTypes,
99-
const PCVStructValuesMap& rawNewInfos,
100100
bool refTest)
101-
: propagatedInfos(propagatedInfos), subTypes(subTypes),
102-
rawNewInfos(rawNewInfos), refTest(refTest) {}
101+
: propagatedInfos(propagatedInfos), refTestInfos(refTestInfos),
102+
subTypes(subTypes), refTest(refTest) {}
103103

104104
template<typename T> std::optional<HeapType> getRelevantHeapType(T* ref) {
105105
auto type = ref->type;
@@ -210,7 +210,9 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
210210
// on simply applying a constant. However, we can try to use a ref.test, if
211211
// that is allowed.
212212
if (!info.isConstant()) {
213-
if (refTest) {
213+
// Note that if the reference is exact, we never need to use a ref.test
214+
// because there will not be multiple subtypes to select between.
215+
if (refTest && !ref->type.isExact()) {
214216
optimizeUsingRefTest(curr, ref, index);
215217
}
216218
return;
@@ -233,22 +235,6 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
233235
auto refType = ref->type;
234236
auto refHeapType = refType.getHeapType();
235237

236-
// We only handle immutable fields in this function, as we will be looking
237-
// at |rawNewInfos|. That is, we are trying to see when a type and its
238-
// subtypes have different values (so that we can differentiate between them
239-
// using a ref.test), and those differences are lost in |propagatedInfos|,
240-
// which has propagated to relevant types so that we can do a single check
241-
// to see what value could be there. So we need to use something more
242-
// precise, |rawNewInfos|, which tracks the values written to struct.news,
243-
// where we know the type exactly (unlike with a struct.set). But for that
244-
// reason the field must be immutable, so that it is valid to only look at
245-
// the struct.news. (A more complex flow analysis could do better here, but
246-
// would be far beyond the scope of this pass.)
247-
if (index != StructUtils::DescriptorIndex &&
248-
GCTypeUtils::getField(refType, index)->mutable_ == Mutable) {
249-
return;
250-
}
251-
252238
// We seek two possible constant values. For each we track the constant and
253239
// the types that have that constant. For example, if we have types A, B, C
254240
// and A and B have 42 in their field, and C has 1337, then we'd have this:
@@ -283,13 +269,17 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
283269
return;
284270
}
285271

286-
auto iter = rawNewInfos.find({type, Exact});
287-
if (iter == rawNewInfos.end()) {
288-
// This type has no struct.news, so we can ignore it: it is abstract.
272+
auto iter = refTestInfos.find({type, Exact});
273+
if (iter == refTestInfos.end()) {
274+
// This type has no allocations, so we can ignore it: it is abstract.
289275
return;
290276
}
291277

292278
auto value = iter->second[index];
279+
if (!value.hasNoted()) {
280+
// Also abstract and ignorable.
281+
return;
282+
}
293283
if (!value.isConstant()) {
294284
// The value here is not constant, so give up entirely.
295285
fail = true;
@@ -409,8 +399,8 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
409399

410400
private:
411401
const PCVStructValuesMap& propagatedInfos;
402+
const PCVStructValuesMap& refTestInfos;
412403
const SubTypes& subTypes;
413-
const PCVStructValuesMap& rawNewInfos;
414404
const bool refTest;
415405

416406
bool changed = false;
@@ -492,20 +482,13 @@ struct ConstantFieldPropagation : public Pass {
492482
scanner.runOnModuleCode(runner, module);
493483

494484
// Combine the data from the functions.
495-
PCVStructValuesMap combinedNewInfos, combinedSetInfos;
496-
functionNewInfos.combineInto(combinedNewInfos);
485+
PCVStructValuesMap combinedSetInfos;
486+
functionNewInfos.combineInto(combinedSetInfos);
497487
functionSetInfos.combineInto(combinedSetInfos);
498488
BoolStructValuesMap combinedCopyInfos;
499489
functionCopyInfos.combineInto(combinedCopyInfos);
500490

501-
// Prepare data we will need later.
502-
SubTypes subTypes(*module);
503-
504-
// Copy the unpropagated data before we propagate. We use this in precise
505-
// lookups.
506-
auto rawNewInfos = combinedNewInfos;
507-
508-
// Handle subtyping. |combinedInfo| so far contains data that represents
491+
// Handle subtyping. |combinedSetInfos| so far contains data that represents
509492
// each struct.new and struct.set's operation on the struct type used in
510493
// that instruction. That is, if we do a struct.set to type T, the value was
511494
// noted for type T. But our actual goal is to answer questions about
@@ -532,10 +515,11 @@ struct ConstantFieldPropagation : public Pass {
532515
// efficient, we therefore propagate information about the possible values
533516
// in each field to both subtypes and supertypes.
534517
//
535-
// struct.new on the other hand knows exactly what type is being written to,
536-
// and so given a get of $A and a new of $B, the new is relevant for the get
537-
// iff $A is a subtype of $B, so we only need to propagate in one direction
538-
// there, to supertypes.
518+
// Values written in struct.news are equivalent to values written to exact
519+
// references. In both cases, the propagation to subtypes will not do
520+
// anything because an exact reference has no non-trivial subtypes. This
521+
// works out because a set of a field of an exact reference (or an
522+
// allocation) cannot ever affect the value read out of a subtype's field.
539523
//
540524
// An exception to the above are copies. If a field is copied then even
541525
// struct.new information cannot be assumed to be precise:
@@ -549,36 +533,57 @@ struct ConstantFieldPropagation : public Pass {
549533
// foo(A->f0); // These can contain 20,
550534
// foo(C->f0); // if the copy read from B.
551535
//
552-
// To handle that, copied fields are treated like struct.set ones (by
553-
// copying the struct.new data to struct.set). Note that we must propagate
554-
// copying to subtypes first, as in the example above the struct.new values
555-
// of subtypes must be taken into account (that is, A or a subtype is being
556-
// copied, so we want to do the same thing for B and C as well as A, since
557-
// a copy of A means it could be a copy of B or C).
558-
StructUtils::TypeHierarchyPropagator<StructUtils::CombinableBool>
559-
boolPropagator(subTypes);
560-
boolPropagator.propagateToSubTypes(combinedCopyInfos);
536+
// The handling of copies is explained below.
537+
SubTypes subTypes(*module);
538+
StructUtils::TypeHierarchyPropagator<PossibleConstantValues> propagator(
539+
subTypes);
540+
541+
// Compute the values without accounting for copies.
542+
PCVStructValuesMap noCopySetInfos = combinedSetInfos;
543+
propagator.propagateToSubTypes(noCopySetInfos);
544+
propagator.propagateToSuperTypes(noCopySetInfos);
545+
546+
// Now account for copies. A copy takes a value from any subtype
547+
// of the copy source to any subtype of the copy destination. Since we last
548+
// propagated to supertypes, we know the propagated values increase
549+
// monotonically as you go up the type hierarchy. The propagated value in a
550+
// field therefore overapproximates the values in the corresponding field in
551+
// all the subtypes. So for each copy, we can use the propagated value as
552+
// the copied value. Then we will propagate set values again, this time
553+
// including the copied values. We only need to repeat the propagation once;
554+
// if the second propagation discovers greater values in the copied fields,
555+
// it can only be because those greater values were propagated from a
556+
// supertype. In that case, the greater value has also been propagated to
557+
// all subtypes, so repeating the process will not further change anything.
558+
//
559+
// TODO: Track separate sources and destinations of copies rather than
560+
// special-casing copies to self. This would let propagation discover
561+
// greater copied values from unrelated types or even different field
562+
// indices, so we would have to repeatedly propagate taking into account the
563+
// latest discovered copied values until reaching a fixed point.
561564
for (auto& [type, copied] : combinedCopyInfos) {
562-
for (Index i = 0; i < copied.size(); i++) {
565+
for (Index i = 0; i < copied.size(); ++i) {
563566
if (copied[i]) {
564-
combinedSetInfos[type][i].combine(combinedNewInfos[type][i]);
567+
combinedSetInfos[type][i].combine(noCopySetInfos[type][i]);
565568
}
566569
}
567570
}
568571

569-
StructUtils::TypeHierarchyPropagator<PossibleConstantValues> propagator(
570-
subTypes);
571-
propagator.propagateToSuperTypes(combinedNewInfos);
572-
propagator.propagateToSuperAndSubTypes(combinedSetInfos);
573-
574-
// Combine both sources of information to the final information that gets
575-
// care about.
576-
PCVStructValuesMap combinedInfos = std::move(combinedNewInfos);
577-
combinedSetInfos.combineInto(combinedInfos);
572+
// Propagate the values again, now including values readable by copies.
573+
// RefTest optimization manually checks the values in every subtype to
574+
// make sure they match, so there's no need to propagate values up for that.
575+
// Snapshot the info before propagating up for use in RefTest
576+
// optimization.
577+
PCVStructValuesMap refTestInfos;
578+
propagator.propagateToSubTypes(combinedSetInfos);
579+
if (refTest) {
580+
refTestInfos = combinedSetInfos;
581+
}
582+
propagator.propagateToSuperTypes(combinedSetInfos);
578583

579584
// Optimize.
580585
// TODO: Skip this if we cannot optimize anything
581-
FunctionOptimizer(combinedInfos, subTypes, rawNewInfos, refTest)
586+
FunctionOptimizer(combinedSetInfos, refTestInfos, subTypes, refTest)
582587
.run(runner, module);
583588
}
584589
};

test/lit/passes/cfp-reftest.wast

Lines changed: 163 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -279,18 +279,18 @@
279279
)
280280
)
281281

282-
;; Almost optimizable, but the field is mutable, so we can't.
282+
;; The field is mutable, but we can still optimize.
283283
(module
284284
;; CHECK: (type $struct (sub (struct (field (mut i32)))))
285285
(type $struct (sub (struct (mut i32))))
286-
;; CHECK: (type $1 (func))
287-
288286
;; CHECK: (type $substruct (sub $struct (struct (field (mut i32)) (field f64))))
289287
(type $substruct (sub $struct (struct (mut i32) f64)))
290288

289+
;; CHECK: (type $2 (func))
290+
291291
;; CHECK: (type $3 (func (param (ref null $struct)) (result i32)))
292292

293-
;; CHECK: (func $create (type $1)
293+
;; CHECK: (func $create (type $2)
294294
;; CHECK-NEXT: (drop
295295
;; CHECK-NEXT: (struct.new $struct
296296
;; CHECK-NEXT: (i32.const 10)
@@ -317,6 +317,165 @@
317317
)
318318
)
319319
;; CHECK: (func $get (type $3) (param $struct (ref null $struct)) (result i32)
320+
;; CHECK-NEXT: (select
321+
;; CHECK-NEXT: (i32.const 20)
322+
;; CHECK-NEXT: (i32.const 10)
323+
;; CHECK-NEXT: (ref.test (ref $substruct)
324+
;; CHECK-NEXT: (ref.as_non_null
325+
;; CHECK-NEXT: (local.get $struct)
326+
;; CHECK-NEXT: )
327+
;; CHECK-NEXT: )
328+
;; CHECK-NEXT: )
329+
;; CHECK-NEXT: )
330+
(func $get (param $struct (ref null $struct)) (result i32)
331+
(struct.get $struct 0
332+
(local.get $struct)
333+
)
334+
)
335+
)
336+
337+
;; No-op sets do not inhibit optimization.
338+
(module
339+
;; CHECK: (type $struct (sub (struct (field (mut i32)))))
340+
(type $struct (sub (struct (mut i32))))
341+
;; CHECK: (type $substruct (sub $struct (struct (field (mut i32)) (field f64))))
342+
(type $substruct (sub $struct (struct (mut i32) f64)))
343+
344+
;; CHECK: (type $2 (func))
345+
346+
;; CHECK: (type $3 (func (param (ref null (exact $struct)) (ref null $substruct))))
347+
348+
;; CHECK: (type $4 (func (param (ref null $struct)) (result i32)))
349+
350+
;; CHECK: (func $create (type $2)
351+
;; CHECK-NEXT: (drop
352+
;; CHECK-NEXT: (struct.new $struct
353+
;; CHECK-NEXT: (i32.const 10)
354+
;; CHECK-NEXT: )
355+
;; CHECK-NEXT: )
356+
;; CHECK-NEXT: (drop
357+
;; CHECK-NEXT: (struct.new $substruct
358+
;; CHECK-NEXT: (i32.const 20)
359+
;; CHECK-NEXT: (f64.const 3.14159)
360+
;; CHECK-NEXT: )
361+
;; CHECK-NEXT: )
362+
;; CHECK-NEXT: )
363+
(func $create
364+
(drop
365+
(struct.new $struct
366+
(i32.const 10)
367+
)
368+
)
369+
(drop
370+
(struct.new $substruct
371+
(i32.const 20)
372+
(f64.const 3.14159)
373+
)
374+
)
375+
)
376+
377+
;; CHECK: (func $sets (type $3) (param $struct-exact (ref null (exact $struct))) (param $substruct (ref null $substruct))
378+
;; CHECK-NEXT: (struct.set $struct 0
379+
;; CHECK-NEXT: (local.get $struct-exact)
380+
;; CHECK-NEXT: (i32.const 10)
381+
;; CHECK-NEXT: )
382+
;; CHECK-NEXT: (struct.set $substruct 0
383+
;; CHECK-NEXT: (local.get $substruct)
384+
;; CHECK-NEXT: (i32.const 20)
385+
;; CHECK-NEXT: )
386+
;; CHECK-NEXT: )
387+
(func $sets (param $struct-exact (ref null (exact $struct))) (param $substruct (ref null $substruct))
388+
(struct.set $struct 0
389+
(local.get $struct-exact)
390+
(i32.const 10)
391+
)
392+
(struct.set $substruct 0
393+
(local.get $substruct)
394+
(i32.const 20)
395+
)
396+
)
397+
398+
;; CHECK: (func $get (type $4) (param $struct (ref null $struct)) (result i32)
399+
;; CHECK-NEXT: (select
400+
;; CHECK-NEXT: (i32.const 20)
401+
;; CHECK-NEXT: (i32.const 10)
402+
;; CHECK-NEXT: (ref.test (ref $substruct)
403+
;; CHECK-NEXT: (ref.as_non_null
404+
;; CHECK-NEXT: (local.get $struct)
405+
;; CHECK-NEXT: )
406+
;; CHECK-NEXT: )
407+
;; CHECK-NEXT: )
408+
;; CHECK-NEXT: )
409+
(func $get (param $struct (ref null $struct)) (result i32)
410+
(struct.get $struct 0
411+
(local.get $struct)
412+
)
413+
)
414+
)
415+
416+
;; Same as above, except now the set to $struct is inexact so we cannot
417+
;; optimize.
418+
(module
419+
;; CHECK: (type $struct (sub (struct (field (mut i32)))))
420+
(type $struct (sub (struct (mut i32))))
421+
;; CHECK: (type $substruct (sub $struct (struct (field (mut i32)) (field f64))))
422+
(type $substruct (sub $struct (struct (mut i32) f64)))
423+
424+
;; CHECK: (type $2 (func))
425+
426+
;; CHECK: (type $3 (func (param (ref null $struct) (ref null $substruct))))
427+
428+
;; CHECK: (type $4 (func (param (ref null $struct)) (result i32)))
429+
430+
;; CHECK: (func $create (type $2)
431+
;; CHECK-NEXT: (drop
432+
;; CHECK-NEXT: (struct.new $struct
433+
;; CHECK-NEXT: (i32.const 10)
434+
;; CHECK-NEXT: )
435+
;; CHECK-NEXT: )
436+
;; CHECK-NEXT: (drop
437+
;; CHECK-NEXT: (struct.new $substruct
438+
;; CHECK-NEXT: (i32.const 20)
439+
;; CHECK-NEXT: (f64.const 3.14159)
440+
;; CHECK-NEXT: )
441+
;; CHECK-NEXT: )
442+
;; CHECK-NEXT: )
443+
(func $create
444+
(drop
445+
(struct.new $struct
446+
(i32.const 10)
447+
)
448+
)
449+
(drop
450+
(struct.new $substruct
451+
(i32.const 20)
452+
(f64.const 3.14159)
453+
)
454+
)
455+
)
456+
457+
;; CHECK: (func $sets (type $3) (param $struct (ref null $struct)) (param $substruct (ref null $substruct))
458+
;; CHECK-NEXT: (struct.set $struct 0
459+
;; CHECK-NEXT: (local.get $struct)
460+
;; CHECK-NEXT: (i32.const 10)
461+
;; CHECK-NEXT: )
462+
;; CHECK-NEXT: (struct.set $substruct 0
463+
;; CHECK-NEXT: (local.get $substruct)
464+
;; CHECK-NEXT: (i32.const 20)
465+
;; CHECK-NEXT: )
466+
;; CHECK-NEXT: )
467+
(func $sets (param $struct (ref null $struct)) (param $substruct (ref null $substruct))
468+
(struct.set $struct 0
469+
(local.get $struct)
470+
(i32.const 10)
471+
)
472+
(struct.set $substruct 0
473+
(local.get $substruct)
474+
(i32.const 20)
475+
)
476+
)
477+
478+
;; CHECK: (func $get (type $4) (param $struct (ref null $struct)) (result i32)
320479
;; CHECK-NEXT: (struct.get $struct 0
321480
;; CHECK-NEXT: (local.get $struct)
322481
;; CHECK-NEXT: )

0 commit comments

Comments
 (0)