Flutter Impeller
compute_unittests.cc
Go to the documentation of this file.
1 // Copyright 2013 The Flutter Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "flutter/fml/synchronization/waitable_event.h"
6 #include "flutter/testing/testing.h"
7 #include "gmock/gmock.h"
9 #include "impeller/fixtures/sample.comp.h"
10 #include "impeller/fixtures/stage1.comp.h"
11 #include "impeller/fixtures/stage2.comp.h"
16 #include "impeller/renderer/prefix_sum_test.comp.h"
17 #include "impeller/renderer/threadgroup_sizing_test.comp.h"
18 
19 namespace impeller {
20 namespace testing {
23 
24 TEST_P(ComputeTest, CapabilitiesReportSupport) {
25  auto context = GetContext();
26  ASSERT_TRUE(context);
27  ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
28 }
29 
30 TEST_P(ComputeTest, CanCreateComputePass) {
31  using CS = SampleComputeShader;
32  auto context = GetContext();
33  auto host_buffer = HostBuffer::Create(context->GetResourceAllocator());
34  ASSERT_TRUE(context);
35  ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
36 
37  using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
38  auto pipeline_desc =
39  SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
40  ASSERT_TRUE(pipeline_desc.has_value());
41  auto compute_pipeline =
42  context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
43  ASSERT_TRUE(compute_pipeline);
44 
45  auto cmd_buffer = context->CreateCommandBuffer();
46  auto pass = cmd_buffer->CreateComputePass();
47  ASSERT_TRUE(pass && pass->IsValid());
48 
49  static constexpr size_t kCount = 5;
50 
51  pass->SetPipeline(compute_pipeline);
52 
53  CS::Info info{.count = kCount};
54  CS::Input0<kCount> input_0;
55  CS::Input1<kCount> input_1;
56  for (size_t i = 0; i < kCount; i++) {
57  input_0.elements[i] = Vector4(2.0 + i, 3.0 + i, 4.0 + i, 5.0 * i);
58  input_1.elements[i] = Vector4(6.0, 7.0, 8.0, 9.0);
59  }
60 
61  input_0.fixed_array[1] = IPoint32(2, 2);
62  input_1.fixed_array[0] = UintPoint32(3, 3);
63  input_0.some_int = 5;
64  input_1.some_struct = CS::SomeStruct{.vf = Point(3, 4), .i = 42};
65 
66  auto output_buffer = CreateHostVisibleDeviceBuffer<CS::Output<kCount>>(
67  context, "Output Buffer");
68 
69  CS::BindInfo(*pass, host_buffer->EmplaceUniform(info));
70  CS::BindInput0(*pass, host_buffer->EmplaceStorageBuffer(input_0));
71  CS::BindInput1(*pass, host_buffer->EmplaceStorageBuffer(input_1));
72  CS::BindOutput(*pass, DeviceBuffer::AsBufferView(output_buffer));
73 
74  ASSERT_TRUE(pass->Compute(ISize(kCount, 1)).ok());
75  ASSERT_TRUE(pass->EncodeCommands());
76 
77  fml::AutoResetWaitableEvent latch;
78  ASSERT_TRUE(
79  context->GetCommandQueue()
80  ->Submit(
81  {cmd_buffer},
82  [&latch, output_buffer, &input_0,
83  &input_1](CommandBuffer::Status status) {
84  EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
85 
86  auto view = DeviceBuffer::AsBufferView(output_buffer);
87  EXPECT_EQ(view.range.length, sizeof(CS::Output<kCount>));
88 
89  CS::Output<kCount>* output =
90  reinterpret_cast<CS::Output<kCount>*>(
91  output_buffer->OnGetContents());
92  EXPECT_TRUE(output);
93  for (size_t i = 0; i < kCount; i++) {
94  Vector4 vector = output->elements[i];
95  Vector4 computed = input_0.elements[i] * input_1.elements[i];
96  EXPECT_EQ(vector,
97  Vector4(computed.x + 2 + input_1.some_struct.i,
98  computed.y + 3 + input_1.some_struct.vf.x,
99  computed.z + 5 + input_1.some_struct.vf.y,
100  computed.w));
101  }
102  latch.Signal();
103  })
104  .ok());
105 
106  latch.Wait();
107 }
108 
109 TEST_P(ComputeTest, CanComputePrefixSum) {
110  using CS = PrefixSumTestComputeShader;
111  auto context = GetContext();
112  auto host_buffer = HostBuffer::Create(context->GetResourceAllocator());
113  ASSERT_TRUE(context);
114  ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
115 
116  using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
117  auto pipeline_desc =
118  SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
119  ASSERT_TRUE(pipeline_desc.has_value());
120  auto compute_pipeline =
121  context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
122  ASSERT_TRUE(compute_pipeline);
123 
124  auto cmd_buffer = context->CreateCommandBuffer();
125  auto pass = cmd_buffer->CreateComputePass();
126  ASSERT_TRUE(pass && pass->IsValid());
127 
128  static constexpr size_t kCount = 5;
129 
130  pass->SetPipeline(compute_pipeline);
131 
132  CS::InputData<kCount> input_data;
133  input_data.count = kCount;
134  for (size_t i = 0; i < kCount; i++) {
135  input_data.data[i] = 1 + i;
136  }
137 
138  auto output_buffer = CreateHostVisibleDeviceBuffer<CS::OutputData<kCount>>(
139  context, "Output Buffer");
140 
141  CS::BindInputData(*pass, host_buffer->EmplaceStorageBuffer(input_data));
142  CS::BindOutputData(*pass, DeviceBuffer::AsBufferView(output_buffer));
143 
144  ASSERT_TRUE(pass->Compute(ISize(kCount, 1)).ok());
145  ASSERT_TRUE(pass->EncodeCommands());
146 
147  fml::AutoResetWaitableEvent latch;
148  ASSERT_TRUE(
149  context->GetCommandQueue()
150  ->Submit({cmd_buffer},
151  [&latch, output_buffer](CommandBuffer::Status status) {
152  EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
153 
154  auto view = DeviceBuffer::AsBufferView(output_buffer);
155  EXPECT_EQ(view.range.length,
156  sizeof(CS::OutputData<kCount>));
157 
158  CS::OutputData<kCount>* output =
159  reinterpret_cast<CS::OutputData<kCount>*>(
160  output_buffer->OnGetContents());
161  EXPECT_TRUE(output);
162 
163  constexpr uint32_t expected[kCount] = {1, 3, 6, 10, 15};
164  for (size_t i = 0; i < kCount; i++) {
165  auto computed_sum = output->data[i];
166  EXPECT_EQ(computed_sum, expected[i]);
167  }
168  latch.Signal();
169  })
170  .ok());
171 
172  latch.Wait();
173 }
174 
175 TEST_P(ComputeTest, 1DThreadgroupSizingIsCorrect) {
176  using CS = ThreadgroupSizingTestComputeShader;
177  auto context = GetContext();
178  ASSERT_TRUE(context);
179  ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
180 
181  using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
182  auto pipeline_desc =
183  SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
184  ASSERT_TRUE(pipeline_desc.has_value());
185  auto compute_pipeline =
186  context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
187  ASSERT_TRUE(compute_pipeline);
188 
189  auto cmd_buffer = context->CreateCommandBuffer();
190  auto pass = cmd_buffer->CreateComputePass();
191  ASSERT_TRUE(pass && pass->IsValid());
192 
193  static constexpr size_t kCount = 2048;
194 
195  pass->SetPipeline(compute_pipeline);
196 
197  auto output_buffer = CreateHostVisibleDeviceBuffer<CS::OutputData<kCount>>(
198  context, "Output Buffer");
199 
200  CS::BindOutputData(*pass, DeviceBuffer::AsBufferView(output_buffer));
201 
202  ASSERT_TRUE(pass->Compute(ISize(kCount, 1)).ok());
203  ASSERT_TRUE(pass->EncodeCommands());
204 
205  fml::AutoResetWaitableEvent latch;
206  ASSERT_TRUE(
207  context->GetCommandQueue()
208  ->Submit({cmd_buffer},
209  [&latch, output_buffer](CommandBuffer::Status status) {
210  EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
211 
212  auto view = DeviceBuffer::AsBufferView(output_buffer);
213  EXPECT_EQ(view.range.length,
214  sizeof(CS::OutputData<kCount>));
215 
216  CS::OutputData<kCount>* output =
217  reinterpret_cast<CS::OutputData<kCount>*>(
218  output_buffer->OnGetContents());
219  EXPECT_TRUE(output);
220  EXPECT_EQ(output->data[kCount - 1], kCount - 1);
221  latch.Signal();
222  })
223  .ok());
224 
225  latch.Wait();
226 }
227 
228 TEST_P(ComputeTest, CanComputePrefixSumLargeInteractive) {
229  using CS = PrefixSumTestComputeShader;
230 
231  auto context = GetContext();
232  auto host_buffer = HostBuffer::Create(context->GetResourceAllocator());
233 
234  ASSERT_TRUE(context);
235  ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
236 
237  auto callback = [&](RenderPass& render_pass) -> bool {
238  using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
239  auto pipeline_desc =
240  SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
241  auto compute_pipeline =
242  context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
243 
244  auto cmd_buffer = context->CreateCommandBuffer();
245  auto pass = cmd_buffer->CreateComputePass();
246 
247  static constexpr size_t kCount = 1023;
248 
249  pass->SetPipeline(compute_pipeline);
250 
251  CS::InputData<kCount> input_data;
252  input_data.count = kCount;
253  for (size_t i = 0; i < kCount; i++) {
254  input_data.data[i] = 1 + i;
255  }
256 
257  auto output_buffer = CreateHostVisibleDeviceBuffer<CS::OutputData<kCount>>(
258  context, "Output Buffer");
259 
260  CS::BindInputData(*pass, host_buffer->EmplaceStorageBuffer(input_data));
261  CS::BindOutputData(*pass, DeviceBuffer::AsBufferView(output_buffer));
262 
263  pass->Compute(ISize(kCount, 1));
264  pass->EncodeCommands();
265  host_buffer->Reset();
266  return context->GetCommandQueue()->Submit({cmd_buffer}).ok();
267  };
268  ASSERT_TRUE(OpenPlaygroundHere(callback));
269 }
270 
271 TEST_P(ComputeTest, MultiStageInputAndOutput) {
272  using CS1 = Stage1ComputeShader;
273  using Stage1PipelineBuilder = ComputePipelineBuilder<CS1>;
274  using CS2 = Stage2ComputeShader;
275  using Stage2PipelineBuilder = ComputePipelineBuilder<CS2>;
276 
277  auto context = GetContext();
278  auto host_buffer = HostBuffer::Create(context->GetResourceAllocator());
279  ASSERT_TRUE(context);
280  ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
281 
282  auto pipeline_desc_1 =
283  Stage1PipelineBuilder::MakeDefaultPipelineDescriptor(*context);
284  ASSERT_TRUE(pipeline_desc_1.has_value());
285  auto compute_pipeline_1 =
286  context->GetPipelineLibrary()->GetPipeline(pipeline_desc_1).Get();
287  ASSERT_TRUE(compute_pipeline_1);
288 
289  auto pipeline_desc_2 =
290  Stage2PipelineBuilder::MakeDefaultPipelineDescriptor(*context);
291  ASSERT_TRUE(pipeline_desc_2.has_value());
292  auto compute_pipeline_2 =
293  context->GetPipelineLibrary()->GetPipeline(pipeline_desc_2).Get();
294  ASSERT_TRUE(compute_pipeline_2);
295 
296  auto cmd_buffer = context->CreateCommandBuffer();
297  auto pass = cmd_buffer->CreateComputePass();
298  ASSERT_TRUE(pass && pass->IsValid());
299 
300  static constexpr size_t kCount1 = 5;
301  static constexpr size_t kCount2 = kCount1 * 2;
302 
303  CS1::Input<kCount1> input_1;
304  input_1.count = kCount1;
305  for (size_t i = 0; i < kCount1; i++) {
306  input_1.elements[i] = i;
307  }
308 
309  CS2::Input<kCount2> input_2;
310  input_2.count = kCount2;
311  for (size_t i = 0; i < kCount2; i++) {
312  input_2.elements[i] = i;
313  }
314 
315  auto output_buffer_1 = CreateHostVisibleDeviceBuffer<CS1::Output<kCount2>>(
316  context, "Output Buffer Stage 1");
317  auto output_buffer_2 = CreateHostVisibleDeviceBuffer<CS2::Output<kCount2>>(
318  context, "Output Buffer Stage 2");
319 
320  {
321  pass->SetPipeline(compute_pipeline_1);
322 
323  CS1::BindInput(*pass, host_buffer->EmplaceStorageBuffer(input_1));
324  CS1::BindOutput(*pass, DeviceBuffer::AsBufferView(output_buffer_1));
325 
326  ASSERT_TRUE(pass->Compute(ISize(512, 1)).ok());
327  pass->AddBufferMemoryBarrier();
328  }
329 
330  {
331  pass->SetPipeline(compute_pipeline_2);
332 
333  CS1::BindInput(*pass, DeviceBuffer::AsBufferView(output_buffer_1));
334  CS2::BindOutput(*pass, DeviceBuffer::AsBufferView(output_buffer_2));
335  ASSERT_TRUE(pass->Compute(ISize(512, 1)).ok());
336  }
337 
338  ASSERT_TRUE(pass->EncodeCommands());
339 
340  fml::AutoResetWaitableEvent latch;
341  ASSERT_TRUE(
342  context->GetCommandQueue()
343  ->Submit({cmd_buffer},
344  [&latch, &output_buffer_1,
345  &output_buffer_2](CommandBuffer::Status status) {
346  EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
347 
348  CS1::Output<kCount2>* output_1 =
349  reinterpret_cast<CS1::Output<kCount2>*>(
350  output_buffer_1->OnGetContents());
351  EXPECT_TRUE(output_1);
352  EXPECT_EQ(output_1->count, 10u);
353  EXPECT_THAT(
354  output_1->elements,
355  ::testing::ElementsAre(0, 0, 2, 3, 4, 6, 6, 9, 8, 12));
356 
357  CS2::Output<kCount2>* output_2 =
358  reinterpret_cast<CS2::Output<kCount2>*>(
359  output_buffer_2->OnGetContents());
360  EXPECT_TRUE(output_2);
361  EXPECT_EQ(output_2->count, 10u);
362  EXPECT_THAT(output_2->elements,
363  ::testing::ElementsAre(0, 0, 4, 6, 8, 12, 12,
364  18, 16, 24));
365 
366  latch.Signal();
367  })
368  .ok());
369 
370  latch.Wait();
371 }
372 
373 TEST_P(ComputeTest, CanCompute1DimensionalData) {
374  using CS = SampleComputeShader;
375  auto context = GetContext();
376  auto host_buffer = HostBuffer::Create(context->GetResourceAllocator());
377  ASSERT_TRUE(context);
378  ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
379 
380  using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
381  auto pipeline_desc =
382  SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
383  ASSERT_TRUE(pipeline_desc.has_value());
384  auto compute_pipeline =
385  context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
386  ASSERT_TRUE(compute_pipeline);
387 
388  auto cmd_buffer = context->CreateCommandBuffer();
389  auto pass = cmd_buffer->CreateComputePass();
390  ASSERT_TRUE(pass && pass->IsValid());
391 
392  static constexpr size_t kCount = 5;
393 
394  pass->SetPipeline(compute_pipeline);
395 
396  CS::Info info{.count = kCount};
397  CS::Input0<kCount> input_0;
398  CS::Input1<kCount> input_1;
399  for (size_t i = 0; i < kCount; i++) {
400  input_0.elements[i] = Vector4(2.0 + i, 3.0 + i, 4.0 + i, 5.0 * i);
401  input_1.elements[i] = Vector4(6.0, 7.0, 8.0, 9.0);
402  }
403 
404  input_0.fixed_array[1] = IPoint32(2, 2);
405  input_1.fixed_array[0] = UintPoint32(3, 3);
406  input_0.some_int = 5;
407  input_1.some_struct = CS::SomeStruct{.vf = Point(3, 4), .i = 42};
408 
409  auto output_buffer = CreateHostVisibleDeviceBuffer<CS::Output<kCount>>(
410  context, "Output Buffer");
411 
412  CS::BindInfo(*pass, host_buffer->EmplaceUniform(info));
413  CS::BindInput0(*pass, host_buffer->EmplaceStorageBuffer(input_0));
414  CS::BindInput1(*pass, host_buffer->EmplaceStorageBuffer(input_1));
415  CS::BindOutput(*pass, DeviceBuffer::AsBufferView(output_buffer));
416 
417  ASSERT_TRUE(pass->Compute(ISize(kCount, 1)).ok());
418  ASSERT_TRUE(pass->EncodeCommands());
419 
420  fml::AutoResetWaitableEvent latch;
421  ASSERT_TRUE(
422  context->GetCommandQueue()
423  ->Submit(
424  {cmd_buffer},
425  [&latch, output_buffer, &input_0,
426  &input_1](CommandBuffer::Status status) {
427  EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
428 
429  auto view = DeviceBuffer::AsBufferView(output_buffer);
430  EXPECT_EQ(view.range.length, sizeof(CS::Output<kCount>));
431 
432  CS::Output<kCount>* output =
433  reinterpret_cast<CS::Output<kCount>*>(
434  output_buffer->OnGetContents());
435  EXPECT_TRUE(output);
436  for (size_t i = 0; i < kCount; i++) {
437  Vector4 vector = output->elements[i];
438  Vector4 computed = input_0.elements[i] * input_1.elements[i];
439  EXPECT_EQ(vector,
440  Vector4(computed.x + 2 + input_1.some_struct.i,
441  computed.y + 3 + input_1.some_struct.vf.x,
442  computed.z + 5 + input_1.some_struct.vf.y,
443  computed.w));
444  }
445  latch.Signal();
446  })
447  .ok());
448 
449  latch.Wait();
450 }
451 
452 TEST_P(ComputeTest, ReturnsEarlyWhenAnyGridDimensionIsZero) {
453  using CS = SampleComputeShader;
454  auto context = GetContext();
455  auto host_buffer = HostBuffer::Create(context->GetResourceAllocator());
456  ASSERT_TRUE(context);
457  ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
458 
459  using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
460  auto pipeline_desc =
461  SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
462  ASSERT_TRUE(pipeline_desc.has_value());
463  auto compute_pipeline =
464  context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
465  ASSERT_TRUE(compute_pipeline);
466 
467  auto cmd_buffer = context->CreateCommandBuffer();
468  auto pass = cmd_buffer->CreateComputePass();
469  ASSERT_TRUE(pass && pass->IsValid());
470 
471  static constexpr size_t kCount = 5;
472 
473  pass->SetPipeline(compute_pipeline);
474 
475  CS::Info info{.count = kCount};
476  CS::Input0<kCount> input_0;
477  CS::Input1<kCount> input_1;
478  for (size_t i = 0; i < kCount; i++) {
479  input_0.elements[i] = Vector4(2.0 + i, 3.0 + i, 4.0 + i, 5.0 * i);
480  input_1.elements[i] = Vector4(6.0, 7.0, 8.0, 9.0);
481  }
482 
483  input_0.fixed_array[1] = IPoint32(2, 2);
484  input_1.fixed_array[0] = UintPoint32(3, 3);
485  input_0.some_int = 5;
486  input_1.some_struct = CS::SomeStruct{.vf = Point(3, 4), .i = 42};
487 
488  auto output_buffer = CreateHostVisibleDeviceBuffer<CS::Output<kCount>>(
489  context, "Output Buffer");
490 
491  CS::BindInfo(*pass, host_buffer->EmplaceUniform(info));
492  CS::BindInput0(*pass, host_buffer->EmplaceStorageBuffer(input_0));
493  CS::BindInput1(*pass, host_buffer->EmplaceStorageBuffer(input_1));
494  CS::BindOutput(*pass, DeviceBuffer::AsBufferView(output_buffer));
495 
496  // Intentionally making the grid size zero in one dimension. No GPU will
497  // tolerate this.
498  EXPECT_FALSE(pass->Compute(ISize(0, 1)).ok());
499  pass->EncodeCommands();
500 }
501 
502 } // namespace testing
503 } // namespace impeller
impeller::DeviceBuffer::AsBufferView
static BufferView AsBufferView(std::shared_ptr< DeviceBuffer > buffer)
Create a buffer view of this entire buffer.
Definition: device_buffer.cc:18
host_buffer.h
impeller::testing::TEST_P
TEST_P(ComputeTest, ReturnsEarlyWhenAnyGridDimensionIsZero)
Definition: compute_unittests.cc:452
impeller::Vector4
Definition: vector.h:232
impeller::HostBuffer::Create
static std::shared_ptr< HostBuffer > Create(const std::shared_ptr< Allocator > &allocator)
Definition: host_buffer.cc:20
impeller::UintPoint32
TPoint< uint32_t > UintPoint32
Definition: point.h:319
impeller::ComputePlaygroundTest
Definition: compute_playground_test.h:19
impeller::TSize< int64_t >
impeller::Point
TPoint< Scalar > Point
Definition: point.h:316
impeller::IPoint32
TPoint< int32_t > IPoint32
Definition: point.h:318
compute_pipeline_builder.h
impeller::testing::INSTANTIATE_COMPUTE_SUITE
INSTANTIATE_COMPUTE_SUITE(ComputeSubgroupTest)
pipeline_library.h
impeller::ISize
TSize< int64_t > ISize
Definition: size.h:138
impeller::RenderPass
Render passes encode render commands directed as one specific render target into an underlying comman...
Definition: render_pass.h:33
command_buffer.h
impeller::testing::TEST_P
TEST_P(AiksTest, CanRenderMaskBlurHugeSigma)
Definition: aiks_blur_unittests.cc:23
compute_playground_test.h
impeller::SampleCount::kCount1
@ kCount1
impeller::CommandBuffer::Status
Status
Definition: command_buffer.h:49
impeller
Definition: aiks_blur_unittests.cc:20
impeller::ComputePipelineBuilder
An optional (but highly recommended) utility for creating pipelines from reflected shader information...
Definition: compute_pipeline_builder.h:25