TritonClient.h
Go to the documentation of this file.
1 #ifndef NuSonic_Triton_TritonClient
2 #define NuSonic_Triton_TritonClient
3 
6 
7 #include <map>
8 #include <vector>
9 #include <string>
10 #include <exception>
11 #include <unordered_map>
12 
13 #include "grpc_client.h"
14 #include "grpc_service.pb.h"
15 
16 namespace lartriton {
17 
18 class TritonClient {
19 public:
20  struct ServerSideStats {
21  uint64_t inference_count_;
22  uint64_t execution_count_;
23  uint64_t success_count_;
24  uint64_t cumm_time_ns_;
25  uint64_t queue_time_ns_;
29  };
30 
31  //constructor
33 
34  //accessors
35  TritonInputMap& input() { return input_; }
36  const TritonOutputMap& output() const { return output_; }
37  unsigned batchSize() const { return batchSize_; }
38  bool verbose() const { return verbose_; }
39  bool setBatchSize(unsigned bsize);
40 
41  //main operation
42  void dispatch() {
43  start();
44  evaluate();
45  }
46 
47  //helper
48  void reset();
49 
50 protected:
51  //helper
52  bool getResults(std::shared_ptr<nvidia::inferenceserver::client::InferResult> results);
53 
54  void start();
55  void evaluate();
56  void finish(bool success);
57 
58  void reportServerSideStats(const ServerSideStats& stats) const;
59  ServerSideStats summarizeServerStats(const inference::ModelStatistics& start_status,
60  const inference::ModelStatistics& end_status) const;
61 
62  inference::ModelStatistics getServerSideStatus() const;
63 
64  //members
67  unsigned allowedTries_, tries_;
69  unsigned maxBatchSize_;
70  unsigned batchSize_;
71  bool noBatch_;
72  bool verbose_;
73 
74  //IO pointers for triton
75  std::vector<nvidia::inferenceserver::client::InferInput*> inputsTriton_;
76  std::vector<const nvidia::inferenceserver::client::InferRequestedOutput*> outputsTriton_;
77 
78  std::unique_ptr<nvidia::inferenceserver::client::InferenceServerGrpcClient> client_;
79  //stores timeout, model name and version
80  nvidia::inferenceserver::client::InferOptions options_;
81 };
82 
83 }
84 #endif
std::vector< const nvidia::inferenceserver::client::InferRequestedOutput * > outputsTriton_
Definition: TritonClient.h:76
ServerSideStats summarizeServerStats(const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
nvidia::inferenceserver::client::InferOptions options_
Definition: TritonClient.h:80
std::unordered_map< std::string, TritonOutputData > TritonOutputMap
Definition: TritonData.h:103
void reportServerSideStats(const ServerSideStats &stats) const
std::string string
Definition: nybbler.cc:12
const TritonOutputMap & output() const
Definition: TritonClient.h:36
std::unique_ptr< nvidia::inferenceserver::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:78
TritonInputMap & input()
Definition: TritonClient.h:35
bool getResults(std::shared_ptr< nvidia::inferenceserver::client::InferResult > results)
void finish(bool success)
TritonOutputMap output_
Definition: TritonClient.h:66
inference::ModelStatistics getServerSideStatus() const
unsigned batchSize() const
Definition: TritonClient.h:37
TritonInputMap input_
Definition: TritonClient.h:65
bool setBatchSize(unsigned bsize)
bool verbose() const
Definition: TritonClient.h:38
TritonClient(const fhicl::ParameterSet &params)
Definition: TritonClient.cc:27
std::vector< nvidia::inferenceserver::client::InferInput * > inputsTriton_
Definition: TritonClient.h:75
std::unordered_map< std::string, TritonInputData > TritonInputMap
Definition: TritonData.h:101