39 "TritonClient(): unable to create inference context");
44 options_.client_timeout_ = params.
get<
unsigned>(
"timeout") * 1e6;
47 inference::ModelConfigResponse modelConfigResponse;
49 "TritonClient(): unable to get model config");
50 inference::ModelConfig modelConfig(modelConfigResponse.config());
58 maxBatchSize_ =
std::max(1u, maxBatchSize_);
61 inference::ModelMetadataResponse modelMetadata;
63 "TritonClient(): unable to get model metadata");
66 const auto& nicInputs = modelMetadata.inputs();
67 const auto& nicOutputs = modelMetadata.outputs();
70 std::ostringstream
msg;
74 if (nicInputs.empty())
75 msg <<
"Model on server appears malformed (zero inputs)\n";
77 if (nicOutputs.empty())
78 msg <<
"Model on server appears malformed (zero outputs)\n";
86 std::ostringstream io_msg;
88 io_msg <<
"Model inputs: " 91 for (
const auto& nicInput : nicInputs) {
92 const auto& iname = nicInput.name();
93 auto [curr_itr, success] =
input_.try_emplace(iname, iname, nicInput,
noBatch_);
94 auto& curr_input = curr_itr->second;
97 io_msg <<
" " << iname <<
" (" << curr_input.dname() <<
", " << curr_input.byteSize()
103 const auto& v_outputs = params.
get<std::vector<std::string>>(
"outputs");
104 std::unordered_set<std::string> s_outputs(v_outputs.begin(), v_outputs.end());
108 io_msg <<
"Model outputs: " 111 for (
const auto& nicOutput : nicOutputs) {
112 const auto& oname = nicOutput.name();
113 if (!s_outputs.empty() and s_outputs.find(oname) == s_outputs.end())
115 auto [curr_itr, success] =
output_.try_emplace(oname, oname, nicOutput,
noBatch_);
116 auto& curr_output = curr_itr->second;
119 io_msg <<
" " << oname <<
" (" << curr_output.dname() <<
", " << curr_output.byteSize()
122 if (!s_outputs.empty())
123 s_outputs.erase(oname);
127 if (!s_outputs.empty())
136 std::ostringstream model_msg;
137 model_msg <<
"Model name: " <<
options_.model_name_ <<
"\n" 138 <<
"Model version: " <<
options_.model_version_ <<
"\n" 140 MF_LOG_INFO(
"TritonClient") << model_msg.str() << io_msg.str();
std::vector< const nvidia::inferenceserver::client::InferRequestedOutput * > outputsTriton_
nvidia::inferenceserver::client::InferOptions options_
void msg(const char *fmt,...)
std::string printColl(const C &coll, const std::string &delim)
std::unique_ptr< nvidia::inferenceserver::client::InferenceServerGrpcClient > client_
void throwIfError(const Error &err, std::string_view msg)
T get(std::string const &key) const
static int max(int a, int b)
#define MF_LOG_INFO(category)
bool setBatchSize(unsigned bsize)
std::vector< nvidia::inferenceserver::client::InferInput * > inputsTriton_
cet::coded_exception< error, detail::translate > exception