Flutter Impeller
compute_unittests.cc
Go to the documentation of this file.
1 // Copyright 2013 The Flutter Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "flutter/fml/synchronization/waitable_event.h"
6 #include "flutter/testing/testing.h"
7 #include "gmock/gmock.h"
9 #include "impeller/fixtures/sample.comp.h"
10 #include "impeller/fixtures/stage1.comp.h"
11 #include "impeller/fixtures/stage2.comp.h"
16 #include "impeller/renderer/prefix_sum_test.comp.h"
17 #include "impeller/renderer/threadgroup_sizing_test.comp.h"
18 
19 namespace impeller {
20 namespace testing {
23 
24 TEST_P(ComputeTest, CapabilitiesReportSupport) {
25  auto context = GetContext();
26  ASSERT_TRUE(context);
27  ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
28 }
29 
30 TEST_P(ComputeTest, CanCreateComputePass) {
31  using CS = SampleComputeShader;
32  auto context = GetContext();
33  auto host_buffer = HostBuffer::Create(context->GetResourceAllocator(),
34  context->GetIdleWaiter());
35  ASSERT_TRUE(context);
36  ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
37 
38  using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
39  auto pipeline_desc =
40  SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
41  ASSERT_TRUE(pipeline_desc.has_value());
42  auto compute_pipeline =
43  context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
44  ASSERT_TRUE(compute_pipeline);
45 
46  auto cmd_buffer = context->CreateCommandBuffer();
47  auto pass = cmd_buffer->CreateComputePass();
48  ASSERT_TRUE(pass && pass->IsValid());
49 
50  static constexpr size_t kCount = 5;
51 
52  pass->SetPipeline(compute_pipeline);
53 
54  CS::Info info{.count = kCount};
55  CS::Input0<kCount> input_0;
56  CS::Input1<kCount> input_1;
57  for (size_t i = 0; i < kCount; i++) {
58  input_0.elements[i] = Vector4(2.0 + i, 3.0 + i, 4.0 + i, 5.0 * i);
59  input_1.elements[i] = Vector4(6.0, 7.0, 8.0, 9.0);
60  }
61 
62  input_0.fixed_array[1] = IPoint32(2, 2);
63  input_1.fixed_array[0] = UintPoint32(3, 3);
64  input_0.some_int = 5;
65  input_1.some_struct = CS::SomeStruct{.vf = Point(3, 4), .i = 42};
66 
67  auto output_buffer = CreateHostVisibleDeviceBuffer<CS::Output<kCount>>(
68  context, "Output Buffer");
69 
70  CS::BindInfo(*pass, host_buffer->EmplaceUniform(info));
71  CS::BindInput0(*pass, host_buffer->EmplaceStorageBuffer(input_0));
72  CS::BindInput1(*pass, host_buffer->EmplaceStorageBuffer(input_1));
73  CS::BindOutput(*pass, DeviceBuffer::AsBufferView(output_buffer));
74 
75  ASSERT_TRUE(pass->Compute(ISize(kCount, 1)).ok());
76  ASSERT_TRUE(pass->EncodeCommands());
77 
78  fml::AutoResetWaitableEvent latch;
79  ASSERT_TRUE(
80  context->GetCommandQueue()
81  ->Submit(
82  {cmd_buffer},
83  [&latch, output_buffer, &input_0,
84  &input_1](CommandBuffer::Status status) {
85  EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
86 
87  auto view = DeviceBuffer::AsBufferView(output_buffer);
88  EXPECT_EQ(view.GetRange().length, sizeof(CS::Output<kCount>));
89 
90  CS::Output<kCount>* output =
91  reinterpret_cast<CS::Output<kCount>*>(
92  output_buffer->OnGetContents());
93  EXPECT_TRUE(output);
94  for (size_t i = 0; i < kCount; i++) {
95  Vector4 vector = output->elements[i];
96  Vector4 computed = input_0.elements[i] * input_1.elements[i];
97  EXPECT_EQ(vector,
98  Vector4(computed.x + 2 + input_1.some_struct.i,
99  computed.y + 3 + input_1.some_struct.vf.x,
100  computed.z + 5 + input_1.some_struct.vf.y,
101  computed.w));
102  }
103  latch.Signal();
104  })
105  .ok());
106 
107  latch.Wait();
108 }
109 
110 TEST_P(ComputeTest, CanComputePrefixSum) {
111  using CS = PrefixSumTestComputeShader;
112  auto context = GetContext();
113  auto host_buffer = HostBuffer::Create(context->GetResourceAllocator(),
114  context->GetIdleWaiter());
115  ASSERT_TRUE(context);
116  ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
117 
118  using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
119  auto pipeline_desc =
120  SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
121  ASSERT_TRUE(pipeline_desc.has_value());
122  auto compute_pipeline =
123  context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
124  ASSERT_TRUE(compute_pipeline);
125 
126  auto cmd_buffer = context->CreateCommandBuffer();
127  auto pass = cmd_buffer->CreateComputePass();
128  ASSERT_TRUE(pass && pass->IsValid());
129 
130  static constexpr size_t kCount = 5;
131 
132  pass->SetPipeline(compute_pipeline);
133 
134  CS::InputData<kCount> input_data;
135  input_data.count = kCount;
136  for (size_t i = 0; i < kCount; i++) {
137  input_data.data[i] = 1 + i;
138  }
139 
140  auto output_buffer = CreateHostVisibleDeviceBuffer<CS::OutputData<kCount>>(
141  context, "Output Buffer");
142 
143  CS::BindInputData(*pass, host_buffer->EmplaceStorageBuffer(input_data));
144  CS::BindOutputData(*pass, DeviceBuffer::AsBufferView(output_buffer));
145 
146  ASSERT_TRUE(pass->Compute(ISize(kCount, 1)).ok());
147  ASSERT_TRUE(pass->EncodeCommands());
148 
149  fml::AutoResetWaitableEvent latch;
150  ASSERT_TRUE(
151  context->GetCommandQueue()
152  ->Submit({cmd_buffer},
153  [&latch, output_buffer](CommandBuffer::Status status) {
154  EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
155 
156  auto view = DeviceBuffer::AsBufferView(output_buffer);
157  EXPECT_EQ(view.GetRange().length,
158  sizeof(CS::OutputData<kCount>));
159 
160  CS::OutputData<kCount>* output =
161  reinterpret_cast<CS::OutputData<kCount>*>(
162  output_buffer->OnGetContents());
163  EXPECT_TRUE(output);
164 
165  constexpr uint32_t expected[kCount] = {1, 3, 6, 10, 15};
166  for (size_t i = 0; i < kCount; i++) {
167  auto computed_sum = output->data[i];
168  EXPECT_EQ(computed_sum, expected[i]);
169  }
170  latch.Signal();
171  })
172  .ok());
173 
174  latch.Wait();
175 }
176 
177 TEST_P(ComputeTest, 1DThreadgroupSizingIsCorrect) {
178  using CS = ThreadgroupSizingTestComputeShader;
179  auto context = GetContext();
180  ASSERT_TRUE(context);
181  ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
182 
183  using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
184  auto pipeline_desc =
185  SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
186  ASSERT_TRUE(pipeline_desc.has_value());
187  auto compute_pipeline =
188  context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
189  ASSERT_TRUE(compute_pipeline);
190 
191  auto cmd_buffer = context->CreateCommandBuffer();
192  auto pass = cmd_buffer->CreateComputePass();
193  ASSERT_TRUE(pass && pass->IsValid());
194 
195  static constexpr size_t kCount = 2048;
196 
197  pass->SetPipeline(compute_pipeline);
198 
199  auto output_buffer = CreateHostVisibleDeviceBuffer<CS::OutputData<kCount>>(
200  context, "Output Buffer");
201 
202  CS::BindOutputData(*pass, DeviceBuffer::AsBufferView(output_buffer));
203 
204  ASSERT_TRUE(pass->Compute(ISize(kCount, 1)).ok());
205  ASSERT_TRUE(pass->EncodeCommands());
206 
207  fml::AutoResetWaitableEvent latch;
208  ASSERT_TRUE(
209  context->GetCommandQueue()
210  ->Submit({cmd_buffer},
211  [&latch, output_buffer](CommandBuffer::Status status) {
212  EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
213 
214  auto view = DeviceBuffer::AsBufferView(output_buffer);
215  EXPECT_EQ(view.GetRange().length,
216  sizeof(CS::OutputData<kCount>));
217 
218  CS::OutputData<kCount>* output =
219  reinterpret_cast<CS::OutputData<kCount>*>(
220  output_buffer->OnGetContents());
221  EXPECT_TRUE(output);
222  EXPECT_EQ(output->data[kCount - 1], kCount - 1);
223  latch.Signal();
224  })
225  .ok());
226 
227  latch.Wait();
228 }
229 
230 TEST_P(ComputeTest, CanComputePrefixSumLargeInteractive) {
231  using CS = PrefixSumTestComputeShader;
232 
233  auto context = GetContext();
234  auto host_buffer = HostBuffer::Create(context->GetResourceAllocator(),
235  context->GetIdleWaiter());
236 
237  ASSERT_TRUE(context);
238  ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
239 
240  auto callback = [&](RenderPass& render_pass) -> bool {
241  using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
242  auto pipeline_desc =
243  SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
244  auto compute_pipeline =
245  context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
246 
247  auto cmd_buffer = context->CreateCommandBuffer();
248  auto pass = cmd_buffer->CreateComputePass();
249 
250  static constexpr size_t kCount = 1023;
251 
252  pass->SetPipeline(compute_pipeline);
253 
254  CS::InputData<kCount> input_data;
255  input_data.count = kCount;
256  for (size_t i = 0; i < kCount; i++) {
257  input_data.data[i] = 1 + i;
258  }
259 
260  auto output_buffer = CreateHostVisibleDeviceBuffer<CS::OutputData<kCount>>(
261  context, "Output Buffer");
262 
263  CS::BindInputData(*pass, host_buffer->EmplaceStorageBuffer(input_data));
264  CS::BindOutputData(*pass, DeviceBuffer::AsBufferView(output_buffer));
265 
266  pass->Compute(ISize(kCount, 1));
267  pass->EncodeCommands();
268  host_buffer->Reset();
269  return context->GetCommandQueue()->Submit({cmd_buffer}).ok();
270  };
271  ASSERT_TRUE(OpenPlaygroundHere(callback));
272 }
273 
274 TEST_P(ComputeTest, MultiStageInputAndOutput) {
275  using CS1 = Stage1ComputeShader;
276  using Stage1PipelineBuilder = ComputePipelineBuilder<CS1>;
277  using CS2 = Stage2ComputeShader;
278  using Stage2PipelineBuilder = ComputePipelineBuilder<CS2>;
279 
280  auto context = GetContext();
281  auto host_buffer = HostBuffer::Create(context->GetResourceAllocator(),
282  context->GetIdleWaiter());
283  ASSERT_TRUE(context);
284  ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
285 
286  auto pipeline_desc_1 =
287  Stage1PipelineBuilder::MakeDefaultPipelineDescriptor(*context);
288  ASSERT_TRUE(pipeline_desc_1.has_value());
289  auto compute_pipeline_1 =
290  context->GetPipelineLibrary()->GetPipeline(pipeline_desc_1).Get();
291  ASSERT_TRUE(compute_pipeline_1);
292 
293  auto pipeline_desc_2 =
294  Stage2PipelineBuilder::MakeDefaultPipelineDescriptor(*context);
295  ASSERT_TRUE(pipeline_desc_2.has_value());
296  auto compute_pipeline_2 =
297  context->GetPipelineLibrary()->GetPipeline(pipeline_desc_2).Get();
298  ASSERT_TRUE(compute_pipeline_2);
299 
300  auto cmd_buffer = context->CreateCommandBuffer();
301  auto pass = cmd_buffer->CreateComputePass();
302  ASSERT_TRUE(pass && pass->IsValid());
303 
304  static constexpr size_t kCount1 = 5;
305  static constexpr size_t kCount2 = kCount1 * 2;
306 
307  CS1::Input<kCount1> input_1;
308  input_1.count = kCount1;
309  for (size_t i = 0; i < kCount1; i++) {
310  input_1.elements[i] = i;
311  }
312 
313  CS2::Input<kCount2> input_2;
314  input_2.count = kCount2;
315  for (size_t i = 0; i < kCount2; i++) {
316  input_2.elements[i] = i;
317  }
318 
319  auto output_buffer_1 = CreateHostVisibleDeviceBuffer<CS1::Output<kCount2>>(
320  context, "Output Buffer Stage 1");
321  auto output_buffer_2 = CreateHostVisibleDeviceBuffer<CS2::Output<kCount2>>(
322  context, "Output Buffer Stage 2");
323 
324  {
325  pass->SetPipeline(compute_pipeline_1);
326 
327  CS1::BindInput(*pass, host_buffer->EmplaceStorageBuffer(input_1));
328  CS1::BindOutput(*pass, DeviceBuffer::AsBufferView(output_buffer_1));
329 
330  ASSERT_TRUE(pass->Compute(ISize(512, 1)).ok());
331  pass->AddBufferMemoryBarrier();
332  }
333 
334  {
335  pass->SetPipeline(compute_pipeline_2);
336 
337  CS1::BindInput(*pass, DeviceBuffer::AsBufferView(output_buffer_1));
338  CS2::BindOutput(*pass, DeviceBuffer::AsBufferView(output_buffer_2));
339  ASSERT_TRUE(pass->Compute(ISize(512, 1)).ok());
340  }
341 
342  ASSERT_TRUE(pass->EncodeCommands());
343 
344  fml::AutoResetWaitableEvent latch;
345  ASSERT_TRUE(
346  context->GetCommandQueue()
347  ->Submit({cmd_buffer},
348  [&latch, &output_buffer_1,
349  &output_buffer_2](CommandBuffer::Status status) {
350  EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
351 
352  CS1::Output<kCount2>* output_1 =
353  reinterpret_cast<CS1::Output<kCount2>*>(
354  output_buffer_1->OnGetContents());
355  EXPECT_TRUE(output_1);
356  EXPECT_EQ(output_1->count, 10u);
357  EXPECT_THAT(
358  output_1->elements,
359  ::testing::ElementsAre(0, 0, 2, 3, 4, 6, 6, 9, 8, 12));
360 
361  CS2::Output<kCount2>* output_2 =
362  reinterpret_cast<CS2::Output<kCount2>*>(
363  output_buffer_2->OnGetContents());
364  EXPECT_TRUE(output_2);
365  EXPECT_EQ(output_2->count, 10u);
366  EXPECT_THAT(output_2->elements,
367  ::testing::ElementsAre(0, 0, 4, 6, 8, 12, 12,
368  18, 16, 24));
369 
370  latch.Signal();
371  })
372  .ok());
373 
374  latch.Wait();
375 }
376 
377 TEST_P(ComputeTest, CanCompute1DimensionalData) {
378  using CS = SampleComputeShader;
379  auto context = GetContext();
380  auto host_buffer = HostBuffer::Create(context->GetResourceAllocator(),
381  context->GetIdleWaiter());
382  ASSERT_TRUE(context);
383  ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
384 
385  using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
386  auto pipeline_desc =
387  SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
388  ASSERT_TRUE(pipeline_desc.has_value());
389  auto compute_pipeline =
390  context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
391  ASSERT_TRUE(compute_pipeline);
392 
393  auto cmd_buffer = context->CreateCommandBuffer();
394  auto pass = cmd_buffer->CreateComputePass();
395  ASSERT_TRUE(pass && pass->IsValid());
396 
397  static constexpr size_t kCount = 5;
398 
399  pass->SetPipeline(compute_pipeline);
400 
401  CS::Info info{.count = kCount};
402  CS::Input0<kCount> input_0;
403  CS::Input1<kCount> input_1;
404  for (size_t i = 0; i < kCount; i++) {
405  input_0.elements[i] = Vector4(2.0 + i, 3.0 + i, 4.0 + i, 5.0 * i);
406  input_1.elements[i] = Vector4(6.0, 7.0, 8.0, 9.0);
407  }
408 
409  input_0.fixed_array[1] = IPoint32(2, 2);
410  input_1.fixed_array[0] = UintPoint32(3, 3);
411  input_0.some_int = 5;
412  input_1.some_struct = CS::SomeStruct{.vf = Point(3, 4), .i = 42};
413 
414  auto output_buffer = CreateHostVisibleDeviceBuffer<CS::Output<kCount>>(
415  context, "Output Buffer");
416 
417  CS::BindInfo(*pass, host_buffer->EmplaceUniform(info));
418  CS::BindInput0(*pass, host_buffer->EmplaceStorageBuffer(input_0));
419  CS::BindInput1(*pass, host_buffer->EmplaceStorageBuffer(input_1));
420  CS::BindOutput(*pass, DeviceBuffer::AsBufferView(output_buffer));
421 
422  ASSERT_TRUE(pass->Compute(ISize(kCount, 1)).ok());
423  ASSERT_TRUE(pass->EncodeCommands());
424 
425  fml::AutoResetWaitableEvent latch;
426  ASSERT_TRUE(
427  context->GetCommandQueue()
428  ->Submit(
429  {cmd_buffer},
430  [&latch, output_buffer, &input_0,
431  &input_1](CommandBuffer::Status status) {
432  EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
433 
434  auto view = DeviceBuffer::AsBufferView(output_buffer);
435  EXPECT_EQ(view.GetRange().length, sizeof(CS::Output<kCount>));
436 
437  CS::Output<kCount>* output =
438  reinterpret_cast<CS::Output<kCount>*>(
439  output_buffer->OnGetContents());
440  EXPECT_TRUE(output);
441  for (size_t i = 0; i < kCount; i++) {
442  Vector4 vector = output->elements[i];
443  Vector4 computed = input_0.elements[i] * input_1.elements[i];
444  EXPECT_EQ(vector,
445  Vector4(computed.x + 2 + input_1.some_struct.i,
446  computed.y + 3 + input_1.some_struct.vf.x,
447  computed.z + 5 + input_1.some_struct.vf.y,
448  computed.w));
449  }
450  latch.Signal();
451  })
452  .ok());
453 
454  latch.Wait();
455 }
456 
457 TEST_P(ComputeTest, ReturnsEarlyWhenAnyGridDimensionIsZero) {
458  using CS = SampleComputeShader;
459  auto context = GetContext();
460  auto host_buffer = HostBuffer::Create(context->GetResourceAllocator(),
461  context->GetIdleWaiter());
462  ASSERT_TRUE(context);
463  ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
464 
465  using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
466  auto pipeline_desc =
467  SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
468  ASSERT_TRUE(pipeline_desc.has_value());
469  auto compute_pipeline =
470  context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
471  ASSERT_TRUE(compute_pipeline);
472 
473  auto cmd_buffer = context->CreateCommandBuffer();
474  auto pass = cmd_buffer->CreateComputePass();
475  ASSERT_TRUE(pass && pass->IsValid());
476 
477  static constexpr size_t kCount = 5;
478 
479  pass->SetPipeline(compute_pipeline);
480 
481  CS::Info info{.count = kCount};
482  CS::Input0<kCount> input_0;
483  CS::Input1<kCount> input_1;
484  for (size_t i = 0; i < kCount; i++) {
485  input_0.elements[i] = Vector4(2.0 + i, 3.0 + i, 4.0 + i, 5.0 * i);
486  input_1.elements[i] = Vector4(6.0, 7.0, 8.0, 9.0);
487  }
488 
489  input_0.fixed_array[1] = IPoint32(2, 2);
490  input_1.fixed_array[0] = UintPoint32(3, 3);
491  input_0.some_int = 5;
492  input_1.some_struct = CS::SomeStruct{.vf = Point(3, 4), .i = 42};
493 
494  auto output_buffer = CreateHostVisibleDeviceBuffer<CS::Output<kCount>>(
495  context, "Output Buffer");
496 
497  CS::BindInfo(*pass, host_buffer->EmplaceUniform(info));
498  CS::BindInput0(*pass, host_buffer->EmplaceStorageBuffer(input_0));
499  CS::BindInput1(*pass, host_buffer->EmplaceStorageBuffer(input_1));
500  CS::BindOutput(*pass, DeviceBuffer::AsBufferView(output_buffer));
501 
502  // Intentionally making the grid size zero in one dimension. No GPU will
503  // tolerate this.
504  EXPECT_FALSE(pass->Compute(ISize(0, 1)).ok());
505  pass->EncodeCommands();
506 }
507 
508 } // namespace testing
509 } // namespace impeller
static BufferView AsBufferView(std::shared_ptr< DeviceBuffer > buffer)
Create a buffer view of this entire buffer.
static std::shared_ptr< HostBuffer > Create(const std::shared_ptr< Allocator > &allocator, const std::shared_ptr< const IdleWaiter > &idle_waiter, size_t minimum_uniform_alignment)
Definition: host_buffer.cc:21
Render passes encode render commands directed as one specific render target into an underlying comman...
Definition: render_pass.h:30
ScopedObject< Object > Create(CtorArgs &&... args)
Definition: object.h:161
TEST_P(ComputeTest, ReturnsEarlyWhenAnyGridDimensionIsZero)
TEST_P(AiksTest, DrawAtlasNoColor)
INSTANTIATE_COMPUTE_SUITE(ComputeTest)
TPoint< Scalar > Point
Definition: point.h:327
TPoint< int32_t > IPoint32
Definition: point.h:329
ISize64 ISize
Definition: size.h:162
TPoint< uint32_t > UintPoint32
Definition: point.h:330
An optional (but highly recommended) utility for creating pipelines from reflected shader information...