Flutter Impeller
gpu_tracer_vk.cc
Go to the documentation of this file.
1 // Copyright 2013 The Flutter Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
6 
7 #include <memory>
8 #include <optional>
9 #include <thread>
10 #include <utility>
11 
12 #include "fml/logging.h"
13 #include "fml/trace_event.h"
17 
18 namespace impeller {
19 
20 static constexpr uint32_t kPoolSize = 128u;
21 
22 GPUTracerVK::GPUTracerVK(std::weak_ptr<ContextVK> context,
23  bool enable_gpu_tracing)
24  : context_(std::move(context)) {
25  if (!enable_gpu_tracing) {
26  return;
27  }
28  timestamp_period_ = context_.lock()
29  ->GetDeviceHolder()
30  ->GetPhysicalDevice()
31  .getProperties()
32  .limits.timestampPeriod;
33  if (timestamp_period_ <= 0) {
34  // The device does not support timestamp queries.
35  return;
36  }
37 // Disable tracing in release mode.
38 #ifdef IMPELLER_DEBUG
39  enabled_ = true;
40 #endif // IMPELLER_DEBUG
41 }
42 
44  if (!enabled_) {
45  return;
46  }
47  Lock lock(trace_state_mutex_);
48  std::shared_ptr<CommandBuffer> buffer = context.CreateCommandBuffer();
49  CommandBufferVK& buffer_vk = CommandBufferVK::Cast(*buffer);
50 
51  for (auto i = 0u; i < kTraceStatesSize; i++) {
52  vk::QueryPoolCreateInfo info;
53  info.queryCount = kPoolSize;
54  info.queryType = vk::QueryType::eTimestamp;
55 
56  auto [status, pool] = context.GetDevice().createQueryPoolUnique(info);
57  if (status != vk::Result::eSuccess) {
58  VALIDATION_LOG << "Failed to create query pool.";
59  return;
60  }
61  trace_states_[i].query_pool = std::move(pool);
62  buffer_vk.GetCommandBuffer().resetQueryPool(
63  trace_states_[i].query_pool.get(), 0, kPoolSize);
64  }
65  if (!context.GetCommandQueue()->Submit({buffer}).ok()) {
66  VALIDATION_LOG << "Failed to reset query pool for trace events.";
67  enabled_ = false;
68  }
69 }
70 
71 bool GPUTracerVK::IsEnabled() const {
72  return enabled_;
73 }
74 
76  if (!enabled_) {
77  return;
78  }
79  FML_DCHECK(!in_frame_);
80  in_frame_ = true;
81  raster_thread_id_ = std::this_thread::get_id();
82 }
83 
85  in_frame_ = false;
86 
87  if (!enabled_) {
88  return;
89  }
90 
91  Lock lock(trace_state_mutex_);
92  current_state_ = (current_state_ + 1) % kTraceStatesSize;
93 
94  auto& state = trace_states_[current_state_];
95  // If there are still pending buffers on the trace state we're switching to,
96  // that means that a cmd buffer we were relying on to signal this likely
97  // never finished. This shouldn't happen unless there is a bug in the
98  // encoder logic. We set it to zero anyway to prevent a validation error
99  // from becoming a memory leak.
100  FML_DCHECK(state.pending_buffers == 0u);
101  state.pending_buffers = 0;
102  state.current_index = 0;
103 }
104 
105 std::unique_ptr<GPUProbe> GPUTracerVK::CreateGPUProbe() {
106  return std::make_unique<GPUProbe>(weak_from_this());
107 }
108 
109 void GPUTracerVK::RecordCmdBufferStart(const vk::CommandBuffer& buffer,
110  GPUProbe& probe) {
111  if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
112  !in_frame_) {
113  return;
114  }
115  Lock lock(trace_state_mutex_);
116  auto& state = trace_states_[current_state_];
117 
118  // Reset previously completed queries.
119  if (!states_to_reset_.empty()) {
120  for (auto i = 0u; i < states_to_reset_.size(); i++) {
121  buffer.resetQueryPool(trace_states_[states_to_reset_[i]].query_pool.get(),
122  0, kPoolSize);
123  }
124  states_to_reset_.clear();
125  }
126 
127  // We size the query pool to kPoolSize, but Flutter applications can create an
128  // unbounded amount of work per frame. If we encounter this, stop recording
129  // cmds.
130  if (state.current_index >= kPoolSize) {
131  return;
132  }
133 
134  buffer.writeTimestamp(vk::PipelineStageFlagBits::eTopOfPipe,
135  trace_states_[current_state_].query_pool.get(),
136  state.current_index);
137  state.current_index += 1;
138  probe.index_ = current_state_;
139  state.pending_buffers += 1;
140 }
141 
142 void GPUTracerVK::RecordCmdBufferEnd(const vk::CommandBuffer& buffer,
143  GPUProbe& probe) {
144  if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
145  !in_frame_ || !probe.index_.has_value()) {
146  return;
147  }
148  Lock lock(trace_state_mutex_);
149  GPUTraceState& state = trace_states_[current_state_];
150 
151  if (state.current_index >= kPoolSize) {
152  return;
153  }
154 
155  buffer.writeTimestamp(vk::PipelineStageFlagBits::eBottomOfPipe,
156  state.query_pool.get(), state.current_index);
157 
158  state.current_index += 1;
159 }
160 
161 void GPUTracerVK::OnFenceComplete(size_t frame_index) {
162  if (!enabled_) {
163  return;
164  }
165 
166  size_t pending = 0;
167  size_t query_count = 0;
168  vk::QueryPool pool;
169  {
170  Lock lock(trace_state_mutex_);
171  GPUTraceState& state = trace_states_[frame_index];
172 
173  FML_DCHECK(state.pending_buffers > 0);
174  state.pending_buffers -= 1;
175  pending = state.pending_buffers;
176  query_count = state.current_index;
177  pool = state.query_pool.get();
178  }
179 
180  if (pending == 0) {
181  std::vector<uint64_t> bits(query_count);
182  std::shared_ptr<ContextVK> context = context_.lock();
183  if (!context) {
184  return;
185  }
186 
187  auto result = context->GetDevice().getQueryPoolResults(
188  pool, 0, query_count, query_count * sizeof(uint64_t), bits.data(),
189  sizeof(uint64_t), vk::QueryResultFlagBits::e64);
190  // This may return VK_NOT_READY if the query couldn't be completed, or if
191  // there are queries still pending. From local testing, this happens
192  // occassionally on very expensive frames. Its unclear if we can do anything
193  // about this, because by design this should only signal after all cmd
194  // buffers have signaled. Adding VK_QUERY_RESULT_WAIT_BIT to the flags
195  // passed to getQueryPoolResults seems like it would fix this, but actually
196  // seems to result in more stuck query errors. Better to just drop them and
197  // move on.
198  if (result == vk::Result::eSuccess) {
199  uint64_t smallest_timestamp = std::numeric_limits<uint64_t>::max();
200  uint64_t largest_timestamp = 0;
201  for (auto i = 0u; i < bits.size(); i++) {
202  smallest_timestamp = std::min(smallest_timestamp, bits[i]);
203  largest_timestamp = std::max(largest_timestamp, bits[i]);
204  }
205  auto gpu_ms =
206  (((largest_timestamp - smallest_timestamp) * timestamp_period_) /
207  1000000);
208  FML_TRACE_COUNTER("flutter", "GPUTracer",
209  reinterpret_cast<int64_t>(this), // Trace Counter ID
210  "FrameTimeMS", gpu_ms);
211  }
212 
213  // Record this query to be reset the next time a command is recorded.
214  Lock lock(trace_state_mutex_);
215  states_to_reset_.push_back(frame_index);
216  }
217 }
218 
219 GPUProbe::GPUProbe(const std::weak_ptr<GPUTracerVK>& tracer)
220  : tracer_(tracer) {}
221 
223  if (!index_.has_value()) {
224  return;
225  }
226  auto tracer = tracer_.lock();
227  if (!tracer) {
228  return;
229  }
230  tracer->OnFenceComplete(index_.value());
231 }
232 
233 void GPUProbe::RecordCmdBufferStart(const vk::CommandBuffer& buffer) {
234  auto tracer = tracer_.lock();
235  if (!tracer) {
236  return;
237  }
238  tracer->RecordCmdBufferStart(buffer, *this);
239 }
240 
241 void GPUProbe::RecordCmdBufferEnd(const vk::CommandBuffer& buffer) {
242  auto tracer = tracer_.lock();
243  if (!tracer) {
244  return;
245  }
246  tracer->RecordCmdBufferEnd(buffer, *this);
247 }
248 
249 } // namespace impeller
static CommandBufferVK & Cast(CommandBuffer &base)
Definition: backend_cast.h:13
vk::CommandBuffer GetCommandBuffer() const
Retrieve the native command buffer from this object.
const vk::Device & GetDevice() const
Definition: context_vk.cc:589
std::shared_ptr< CommandBuffer > CreateCommandBuffer() const override
Create a new command buffer. Command buffers can be used to encode graphics, blit,...
Definition: context_vk.cc:536
std::shared_ptr< CommandQueue > GetCommandQueue() const override
Return the graphics queue for submitting command buffers.
Definition: context_vk.cc:648
void RecordCmdBufferStart(const vk::CommandBuffer &buffer)
Record a timestamp query into the provided cmd buffer to record start time.
GPUProbe(const std::weak_ptr< GPUTracerVK > &tracer)
void RecordCmdBufferEnd(const vk::CommandBuffer &buffer)
Record a timestamp query into the provided cmd buffer to record end time.
void MarkFrameStart()
Signal the start of a frame workload.
void MarkFrameEnd()
Signal the end of a frame workload.
std::unique_ptr< GPUProbe > CreateGPUProbe()
Create a GPUProbe to trace the execution of a command buffer on the GPU.
void InitializeQueryPool(const ContextVK &context)
Initialize the set of query pools.
GPUTracerVK(std::weak_ptr< ContextVK > context, bool enable_gpu_tracing)
bool IsEnabled() const
static constexpr uint32_t kPoolSize
Definition: comparable.h:95
#define VALIDATION_LOG
Definition: validation.h:91