Flutter Impeller
gpu_tracer_vk.cc
Go to the documentation of this file.
1 // Copyright 2013 The Flutter Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
6 
7 #include <memory>
8 #include <optional>
9 #include <thread>
10 #include <utility>
11 
12 #include "fml/logging.h"
13 #include "fml/trace_event.h"
18 
19 #include "vulkan/vulkan.hpp"
20 
21 namespace impeller {
22 
23 static constexpr uint32_t kPoolSize = 128u;
24 
25 GPUTracerVK::GPUTracerVK(std::weak_ptr<ContextVK> context,
26  bool enable_gpu_tracing)
27  : context_(std::move(context)) {
28  if (!enable_gpu_tracing) {
29  return;
30  }
31  timestamp_period_ = context_.lock()
32  ->GetDeviceHolder()
33  ->GetPhysicalDevice()
34  .getProperties()
35  .limits.timestampPeriod;
36  if (timestamp_period_ <= 0) {
37  // The device does not support timestamp queries.
38  return;
39  }
40 // Disable tracing in release mode.
41 #ifdef IMPELLER_DEBUG
42  enabled_ = true;
43 #endif // IMPELLER_DEBUG
44 }
45 
47  if (!enabled_) {
48  return;
49  }
50  Lock lock(trace_state_mutex_);
51  std::shared_ptr<CommandBuffer> buffer = context.CreateCommandBuffer();
52  CommandBufferVK& buffer_vk = CommandBufferVK::Cast(*buffer);
53 
54  for (auto i = 0u; i < kTraceStatesSize; i++) {
55  vk::QueryPoolCreateInfo info;
56  info.queryCount = kPoolSize;
57  info.queryType = vk::QueryType::eTimestamp;
58 
59  auto [status, pool] = context.GetDevice().createQueryPoolUnique(info);
60  if (status != vk::Result::eSuccess) {
61  VALIDATION_LOG << "Failed to create query pool.";
62  return;
63  }
64  trace_states_[i].query_pool = std::move(pool);
65  buffer_vk.GetEncoder()->GetCommandBuffer().resetQueryPool(
66  trace_states_[i].query_pool.get(), 0, kPoolSize);
67  }
68  if (!context.GetCommandQueue()->Submit({buffer}).ok()) {
69  VALIDATION_LOG << "Failed to reset query pool for trace events.";
70  enabled_ = false;
71  }
72 }
73 
74 bool GPUTracerVK::IsEnabled() const {
75  return enabled_;
76 }
77 
79  if (!enabled_) {
80  return;
81  }
82  FML_DCHECK(!in_frame_);
83  in_frame_ = true;
84  raster_thread_id_ = std::this_thread::get_id();
85 }
86 
88  in_frame_ = false;
89 
90  if (!enabled_) {
91  return;
92  }
93 
94  Lock lock(trace_state_mutex_);
95  current_state_ = (current_state_ + 1) % kTraceStatesSize;
96 
97  auto& state = trace_states_[current_state_];
98  // If there are still pending buffers on the trace state we're switching to,
99  // that means that a cmd buffer we were relying on to signal this likely
100  // never finished. This shouldn't happen unless there is a bug in the
101  // encoder logic. We set it to zero anyway to prevent a validation error
102  // from becoming a memory leak.
103  FML_DCHECK(state.pending_buffers == 0u);
104  state.pending_buffers = 0;
105  state.current_index = 0;
106 }
107 
108 std::unique_ptr<GPUProbe> GPUTracerVK::CreateGPUProbe() {
109  return std::make_unique<GPUProbe>(weak_from_this());
110 }
111 
112 void GPUTracerVK::RecordCmdBufferStart(const vk::CommandBuffer& buffer,
113  GPUProbe& probe) {
114  if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
115  !in_frame_) {
116  return;
117  }
118  Lock lock(trace_state_mutex_);
119  auto& state = trace_states_[current_state_];
120 
121  // Reset previously completed queries.
122  if (!states_to_reset_.empty()) {
123  for (auto i = 0u; i < states_to_reset_.size(); i++) {
124  buffer.resetQueryPool(trace_states_[states_to_reset_[i]].query_pool.get(),
125  0, kPoolSize);
126  }
127  states_to_reset_.clear();
128  }
129 
130  // We size the query pool to kPoolSize, but Flutter applications can create an
131  // unbounded amount of work per frame. If we encounter this, stop recording
132  // cmds.
133  if (state.current_index >= kPoolSize) {
134  return;
135  }
136 
137  buffer.writeTimestamp(vk::PipelineStageFlagBits::eTopOfPipe,
138  trace_states_[current_state_].query_pool.get(),
139  state.current_index);
140  state.current_index += 1;
141  probe.index_ = current_state_;
142  state.pending_buffers += 1;
143 }
144 
145 void GPUTracerVK::RecordCmdBufferEnd(const vk::CommandBuffer& buffer,
146  GPUProbe& probe) {
147  if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
148  !in_frame_ || !probe.index_.has_value()) {
149  return;
150  }
151  Lock lock(trace_state_mutex_);
152  GPUTraceState& state = trace_states_[current_state_];
153 
154  if (state.current_index >= kPoolSize) {
155  return;
156  }
157 
158  buffer.writeTimestamp(vk::PipelineStageFlagBits::eBottomOfPipe,
159  state.query_pool.get(), state.current_index);
160 
161  state.current_index += 1;
162 }
163 
164 void GPUTracerVK::OnFenceComplete(size_t frame_index) {
165  if (!enabled_) {
166  return;
167  }
168 
169  size_t pending = 0;
170  size_t query_count = 0;
171  vk::QueryPool pool;
172  {
173  Lock lock(trace_state_mutex_);
174  GPUTraceState& state = trace_states_[frame_index];
175 
176  FML_DCHECK(state.pending_buffers > 0);
177  state.pending_buffers -= 1;
178  pending = state.pending_buffers;
179  query_count = state.current_index;
180  pool = state.query_pool.get();
181  }
182 
183  if (pending == 0) {
184  std::vector<uint64_t> bits(query_count);
185  std::shared_ptr<ContextVK> context = context_.lock();
186  if (!context) {
187  return;
188  }
189 
190  auto result = context->GetDevice().getQueryPoolResults(
191  pool, 0, query_count, query_count * sizeof(uint64_t), bits.data(),
192  sizeof(uint64_t), vk::QueryResultFlagBits::e64);
193  // This may return VK_NOT_READY if the query couldn't be completed, or if
194  // there are queries still pending. From local testing, this happens
195  // occassionally on very expensive frames. Its unclear if we can do anything
196  // about this, because by design this should only signal after all cmd
197  // buffers have signaled. Adding VK_QUERY_RESULT_WAIT_BIT to the flags
198  // passed to getQueryPoolResults seems like it would fix this, but actually
199  // seems to result in more stuck query errors. Better to just drop them and
200  // move on.
201  if (result == vk::Result::eSuccess) {
202  uint64_t smallest_timestamp = std::numeric_limits<uint64_t>::max();
203  uint64_t largest_timestamp = 0;
204  for (auto i = 0u; i < bits.size(); i++) {
205  smallest_timestamp = std::min(smallest_timestamp, bits[i]);
206  largest_timestamp = std::max(largest_timestamp, bits[i]);
207  }
208  auto gpu_ms =
209  (((largest_timestamp - smallest_timestamp) * timestamp_period_) /
210  1000000);
211  FML_TRACE_COUNTER("flutter", "GPUTracer",
212  reinterpret_cast<int64_t>(this), // Trace Counter ID
213  "FrameTimeMS", gpu_ms);
214  }
215 
216  // Record this query to be reset the next time a command is recorded.
217  Lock lock(trace_state_mutex_);
218  states_to_reset_.push_back(frame_index);
219  }
220 }
221 
222 GPUProbe::GPUProbe(const std::weak_ptr<GPUTracerVK>& tracer)
223  : tracer_(tracer) {}
224 
226  if (!index_.has_value()) {
227  return;
228  }
229  auto tracer = tracer_.lock();
230  if (!tracer) {
231  return;
232  }
233  tracer->OnFenceComplete(index_.value());
234 }
235 
236 void GPUProbe::RecordCmdBufferStart(const vk::CommandBuffer& buffer) {
237  auto tracer = tracer_.lock();
238  if (!tracer) {
239  return;
240  }
241  tracer->RecordCmdBufferStart(buffer, *this);
242 }
243 
244 void GPUProbe::RecordCmdBufferEnd(const vk::CommandBuffer& buffer) {
245  auto tracer = tracer_.lock();
246  if (!tracer) {
247  return;
248  }
249  tracer->RecordCmdBufferEnd(buffer, *this);
250 }
251 
252 } // namespace impeller
gpu_tracer_vk.h
impeller::GPUProbe::~GPUProbe
~GPUProbe()
Definition: gpu_tracer_vk.cc:225
impeller::GPUTracerVK::MarkFrameStart
void MarkFrameStart()
Signal the start of a frame workload.
Definition: gpu_tracer_vk.cc:78
impeller::ContextVK::GetCommandQueue
std::shared_ptr< CommandQueue > GetCommandQueue() const override
Return the graphics queue for submitting command buffers.
Definition: context_vk.cc:578
impeller::CommandBufferVK::GetEncoder
const std::shared_ptr< CommandEncoderVK > & GetEncoder()
Definition: command_buffer_vk.cc:45
impeller::GPUProbe::GPUProbe
GPUProbe(const std::weak_ptr< GPUTracerVK > &tracer)
Definition: gpu_tracer_vk.cc:222
command_encoder_vk.h
impeller::Lock
Definition: thread.h:75
impeller::GPUTracerVK::MarkFrameEnd
void MarkFrameEnd()
Signal the end of a frame workload.
Definition: gpu_tracer_vk.cc:87
impeller::ContextVK::CreateCommandBuffer
std::shared_ptr< CommandBuffer > CreateCommandBuffer() const override
Create a new command buffer. Command buffers can be used to encode graphics, blit,...
Definition: context_vk.cc:503
validation.h
impeller::GPUTracerVK::GPUTracerVK
GPUTracerVK(std::weak_ptr< ContextVK > context, bool enable_gpu_tracing)
Definition: gpu_tracer_vk.cc:25
command_buffer_vk.h
impeller::GPUTracerVK::InitializeQueryPool
void InitializeQueryPool(const ContextVK &context)
Initialize the set of query pools.
Definition: gpu_tracer_vk.cc:46
impeller::CommandBufferVK
Definition: command_buffer_vk.h:18
impeller::GPUProbe
Definition: gpu_tracer_vk.h:101
impeller::ContextVK
Definition: context_vk.h:42
VALIDATION_LOG
#define VALIDATION_LOG
Definition: validation.h:73
std
Definition: comparable.h:95
impeller::ContextVK::GetDevice
const vk::Device & GetDevice() const
Definition: context_vk.cc:514
impeller::BackendCast< CommandBufferVK, CommandBuffer >::Cast
static CommandBufferVK & Cast(CommandBuffer &base)
Definition: backend_cast.h:13
impeller::GPUProbe::RecordCmdBufferStart
void RecordCmdBufferStart(const vk::CommandBuffer &buffer)
Record a timestamp query into the provided cmd buffer to record start time.
Definition: gpu_tracer_vk.cc:236
context_vk.h
impeller::kPoolSize
static constexpr uint32_t kPoolSize
Definition: gpu_tracer_vk.cc:23
impeller::GPUTracerVK::CreateGPUProbe
std::unique_ptr< GPUProbe > CreateGPUProbe()
Create a GPUProbe to trace the execution of a command buffer on the GPU.
Definition: gpu_tracer_vk.cc:108
impeller::GPUTracerVK::IsEnabled
bool IsEnabled() const
Definition: gpu_tracer_vk.cc:74
impeller::GPUProbe::RecordCmdBufferEnd
void RecordCmdBufferEnd(const vk::CommandBuffer &buffer)
Record a timestamp query into the provided cmd buffer to record end time.
Definition: gpu_tracer_vk.cc:244
impeller
Definition: aiks_blur_unittests.cc:20