1- /* Edge Impulse inferencing library
2- * Copyright (c) 2020 EdgeImpulse Inc.
1+ /* The Clear BSD License
32 *
4- * Permission is hereby granted, free of charge, to any person obtaining a copy
5- * of this software and associated documentation files (the "Software"), to deal
6- * in the Software without restriction, including without limitation the rights
7- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8- * copies of the Software, and to permit persons to whom the Software is
9- * furnished to do so, subject to the following conditions:
3+ * Copyright (c) 2025 EdgeImpulse Inc.
4+ * All rights reserved.
105 *
11- * The above copyright notice and this permission notice shall be included in
12- * all copies or substantial portions of the Software.
6+ * Redistribution and use in source and binary forms, with or without
7+ * modification, are permitted (subject to the limitations in the disclaimer
8+ * below) provided that the following conditions are met:
139 *
14- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20- * SOFTWARE.
10+ * * Redistributions of source code must retain the above copyright notice,
11+ * this list of conditions and the following disclaimer.
12+ *
13+ * * Redistributions in binary form must reproduce the above copyright
14+ * notice, this list of conditions and the following disclaimer in the
15+ * documentation and/or other materials provided with the distribution.
16+ *
17+ * * Neither the name of the copyright holder nor the names of its
18+ * contributors may be used to endorse or promote products derived from this
19+ * software without specific prior written permission.
20+ *
21+ * NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
22+ * THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
23+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
25+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
26+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
30+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32+ * POSSIBILITY OF SUCH DAMAGE.
2133 */
2234
2335#pragma once
@@ -31,32 +43,163 @@ namespace libeitrt
3143{
3244
3345/* *
34- * @brief Creates and initializes an inference engine for TensorRT.
35- * If the engine has already been created from the provided file path, then
36- * the engine is loaded from disk.
37- *
38- * The engine is then persisted via the EiTrt object until it is deleted,
39- * to provide for fastest inference with lowest overhead
46+ * @brief Creates and initializes a context for building and running TensorRT models.
47+ *
48+ * The models genenerated (or managed) from this context is then persisted via the EiTrt
49+ * object until it is deleted, to provide for fastest inference with lowest
50+ * overhead.
4051 *
4152 * WARNING: This function leaks..the handle can not be deleted b/c of forward declaration
4253 * The fix for this is to define an interface (virtual class) that has a virtual destructor
4354 * And also the infer function (although this way is more C friendly!)
4455 * My bad...should have done that from get go.
4556 *
57+ * @param debug enable debug if true, disable otherwise.
58+ * @return std::unique_ptr<EiTrt> EiTrt handle. Contained ptr is NULL if error
59+ */
60+ EiTrt* create_EiTrt (bool debug);
61+
62+ /* *
63+ * @brief Builds and initializes an inference engine for TensorRT.
64+ * If the engine has already been created from the provided file path, then
65+ * the engine is loaded from disk.
66+ *
67+ * The engine is then persisted via the EiTrt object until it is deleted,
68+ * to provide for fastest inference with lowest overhead
69+ *
70+ * @param ei_trt_handle EI TensorRT context.
71+ * @param model_id an index to associate with the model.
4672 * @param model_file_name Model file path.
4773 * Should have hash appended so that engines are regenerated when models change!
48- * @return std::unique_ptr<EiTrt> EiTrt handle. Contained ptr is NULL if error
74+ * @return true if building (or loading) the TensorRT model was successful.
75+ */
76+ bool build (EiTrt* ei_trt_handle, int model_id, const char *model_file_name);
77+
78+ /* *
79+ * @brief Warms up the model on the GPU for given warm_up_ms ms.
80+ *
81+ * @param ei_trt_handle EI TensorRT context.
82+ * @param model_id a reference to the model to work on.
83+ * @param warm_up_ms the duration to loop and run inference.
84+ * @return true if warming up the model was successful.
85+ */
86+ bool warmUp (EiTrt* ei_trt_handle, int model_id, int warm_up_ms);
87+
88+ /* *
89+ * @brief Copies input to the GPU (from CPU) for inference for model_id.
90+ *
91+ * @param ei_trt_handle EI TensorRT context.
92+ * @param model_id a reference to the model to work on.
93+ * @param input a pointer to the (float) input
94+ * @param size the number of bytes to copy from the input
95+ * @return true if copying the input was successful.
4996 */
50- EiTrt* create_EiTrt ( const char * model_file_name, bool debug );
97+ bool copyInputToDevice ( EiTrt* ei_trt_handle, int model_id, float * input, int size );
5198
5299/* *
53100 * @brief Perform inference
54- *
55- * @param ei_trt_handle Created handle to inference engine
56- * @param[in] input Input features (buffer member of ei_matrix)
57- * @param[out] output Buffer to write output to
58- * @param output_size Buffer size
101+ *
102+ * @param ei_trt_handle EI TensorRT context.
59103 * @return int 0 on success, <0 otherwise
60104 */
61- int infer (EiTrt* ei_trt_handle, float * input, float * output, int output_size);
105+ int infer (EiTrt* ei_trt_handle, int model_id);
106+
107+ /* *
108+ * @brief Copies output to the CPU (from GPU) after inference from model_id.
109+ *
110+ * @param ei_trt_handle EI TensorRT context.
111+ * @param model_id a reference to the model to work on.
112+ * @param output a pointer to the (float) output
113+ * @param size the amount of bytes to copy from the output
114+ * @return true if copying the output was successful.
115+ */
116+ bool copyOutputToHost (EiTrt* ei_trt_handle, int model_id, float * output, int size);
117+
118+ /* *
119+ * @brief Configures the maximum workspace that may be allocated
120+ *
121+ * @param ei_trt_handle EI TensorRT context.
122+ * @param size workspace size in bytes.
123+ */
124+ void setMaxWorkspaceSize (EiTrt *ei_trt_handle, int size);
125+
126+ /* *
127+ * @brief Returns the current configured maximum workspace size.
128+ *
129+ * @param ei_trt_handle EI TensorRT context.
130+ * @return the size of the workspace in bytes.
131+ */
132+ int getMaxWorkspaceSize (EiTrt *ei_trt_handle);
133+
134+ /* *
135+ * @brief Returns the input size (in features) of model_id.
136+ *
137+ * @param ei_trt_handle EI TensorRT context.
138+ * @param model_id a reference to the model to work on.
139+ * @return the input size (in features).
140+ */
141+ int getInputSize (EiTrt* ei_trt_handle, int model_id);
142+
143+ /* *
144+ * @brief Returns the output size (in features) of model_id.
145+ *
146+ * @param ei_trt_handle EI TensorRT context.
147+ * @param model_id a reference to the model to work on.
148+ * @return the output size (in features).
149+ */
150+ int getOutputSize (EiTrt* ei_trt_handle, int model_id);
151+
152+ /* *
153+ * @brief Returns the latest inference latency in ms for model with id
154+ * (model_id) and context (ei_trt_handle).
155+ *
156+ * @param ei_trt_handle EI TensorRT context.
157+ * @param model_id a reference to the model to work on.
158+ * @return the inference time in ms.
159+ **/
160+ uint64_t getInferenceMs (EiTrt* ei_trt_handle, int model_id);
161+
162+ /* *
163+ * @brief Returns the latest inference latency in us for model with id
164+ * (model_id) and context (ei_trt_handle).
165+ *
166+ * @param ei_trt_handle EI TensorRT context.
167+ * @param model_id a reference to the model to work on.
168+ * @return the inference time in us.
169+ **/
170+ uint64_t getInferenceUs (EiTrt* ei_trt_handle, int model_id);
171+
172+ /* *
173+ * @brief Returns the latest inference latency in ns for model with id
174+ * (model_id) and context (ei_trt_handle).
175+ *
176+ * @param ei_trt_handle EI TensorRT context.
177+ * @param model_id a reference to the model to work on.
178+ * @return the inference time in ns.
179+ **/
180+ uint64_t getInferenceNs (EiTrt* ei_trt_handle, int model_id);
181+
182+ /* *
183+ * @brief Returns the current library major version
184+ *
185+ * @param ei_trt_handle EI TensorRT context.
186+ * @return the library's major version.
187+ **/
188+ int getMajorVersion (EiTrt *ei_trt_handle);
189+
190+ /* *
191+ * @brief Returns the current library minor version
192+ *
193+ * @param ei_trt_handle EI TensorRT context.
194+ * @return the library's minor version.
195+ **/
196+ int getMinorVersion (EiTrt *ei_trt_handle);
197+
198+ /* *
199+ * @brief Returns the current library patch version
200+ *
201+ * @param ei_trt_handle EI TensorRT context.
202+ * @return the library's patch version.
203+ **/
204+ int getPatchVersion (EiTrt *ei_trt_handle);
62205}
0 commit comments