@@ -181,6 +181,7 @@ static int accelerator_rocm_component_register(void)
181181
182182int opal_accelerator_rocm_lazy_init ()
183183{
184+ hipError_t hip_err ;
184185 int err = OPAL_SUCCESS ;
185186
186187 /* Double checked locking to avoid having to
@@ -196,41 +197,94 @@ int opal_accelerator_rocm_lazy_init()
196197 goto out ;
197198 }
198199
199- err = hipGetDeviceCount (& opal_accelerator_rocm_num_devices );
200- if (hipSuccess != err ) {
200+ hip_err = hipGetDeviceCount (& opal_accelerator_rocm_num_devices );
201+ if (hipSuccess != hip_err ) {
201202 opal_output (0 , "Failed to query device count, err=%d %s\n" ,
202- err , hipGetErrorString (err ));
203- err = OPAL_ERROR ;
203+ hip_err , hipGetErrorString (hip_err ));
204+ err = OPAL_ERROR ;
204205 goto out ;
205206 }
206207
207208 hipStream_t memcpy_stream ;
208- err = hipStreamCreate (& memcpy_stream );
209- if (hipSuccess != err ) {
209+ hip_err = hipStreamCreate (& memcpy_stream );
210+ if (hipSuccess != hip_err ) {
210211 opal_output (0 , "Could not create hipStream, err=%d %s\n" ,
211- err , hipGetErrorString (err ));
212- err = OPAL_ERROR ; // we got hipErrorInvalidValue, pretty bad
212+ hip_err , hipGetErrorString (hip_err ));
213+ err = OPAL_ERROR ; // we got hipErrorInvalidValue, pretty bad
213214 goto out ;
214215 }
216+
215217 opal_accelerator_rocm_MemcpyStream = malloc (sizeof (hipStream_t ));
218+ if (NULL == opal_accelerator_rocm_MemcpyStream ) {
219+ opal_output (0 , "Could not allocate hipStream\n" );
220+ err = OPAL_ERR_OUT_OF_RESOURCE ;
221+ goto out ;
222+ }
216223 * opal_accelerator_rocm_MemcpyStream = memcpy_stream ;
217224
218225 opal_accelerator_rocm_mem_bw = malloc (sizeof (float )* opal_accelerator_rocm_num_devices );
226+ if (NULL == opal_accelerator_rocm_mem_bw ) {
227+ opal_output (0 , "Could not allocate memory_bw array\n" );
228+ err = OPAL_ERR_OUT_OF_RESOURCE ;
229+ goto out ;
230+ }
231+
219232 for (int i = 0 ; i < opal_accelerator_rocm_num_devices ; ++ i ) {
220233 int mem_clock_rate ; // kHz
221- err = hipDeviceGetAttribute (& mem_clock_rate ,
222- hipDeviceAttributeMemoryClockRate ,
223- i );
234+ hip_err = hipDeviceGetAttribute (& mem_clock_rate ,
235+ hipDeviceAttributeMemoryClockRate ,
236+ i );
237+ if (hipSuccess != hip_err ) {
238+ opal_output (0 , "Failed to query device MemoryClockRate, err=%d %s\n" ,
239+ hip_err , hipGetErrorString (hip_err ));
240+ err = OPAL_ERROR ;
241+ goto out ;
242+ }
243+
224244 int bus_width ; // bit
225- err = hipDeviceGetAttribute (& bus_width ,
226- hipDeviceAttributeMemoryBusWidth ,
227- i );
228- /* bw = clock_rate * bus width * 2bit multiplier
229- * See https://forums.developer.nvidia.com/t/memory-clock-rate/107940
230- */
245+ hip_err = hipDeviceGetAttribute (& bus_width ,
246+ hipDeviceAttributeMemoryBusWidth ,
247+ i );
248+ if (hipSuccess != hip_err ) {
249+ opal_output (0 , "Failed to query device MemoryBusWidth, err=%d %s\n" ,
250+ hip_err , hipGetErrorString (hip_err ));
251+ err = OPAL_ERROR ;
252+ goto out ;
253+ }
254+
255+ /* bw = clock_rate * bus width * 2bit multiplier */
231256 float bw = ((float )mem_clock_rate * (float )bus_width * 2.0 ) / 1024 / 1024 / 8 ;
232257 opal_accelerator_rocm_mem_bw [i ] = bw ;
233258 }
259+
260+ #if HIP_VERSION >= 60000000
261+ int dev_id ;
262+ hip_err = hipGetDevice (& dev_id );
263+ if (hipSuccess != hip_err ) {
264+ opal_output (0 , "error retrieving current device" );
265+ err = OPAL_ERROR ;
266+ goto out ;
267+ }
268+
269+ int has_large_bar = 0 ;
270+ hip_err = hipDeviceGetAttribute (& has_large_bar , hipDeviceAttributeIsLargeBar ,
271+ dev_id );
272+ if (hipSuccess != hip_err ) {
273+ opal_output (0 , "error retrieving current device" );
274+ err = OPAL_ERROR ;
275+ goto out ;
276+ }
277+
278+ if (0 == has_large_bar ) {
279+ // Without large BAR we have to use hipMemcpy(Async) for all data transfers
280+ opal_output (0 , "Large BAR support is not enabled on current device. "
281+ "Enable large BAR support in BIOS (Above 4G Encoding) for "
282+ "better performance\n." );
283+ opal_accelerator_rocm_memcpyH2D_limit = 0 ;
284+ opal_accelerator_rocm_memcpyD2H_limit = 0 ;
285+ }
286+ #endif
287+
234288 err = OPAL_SUCCESS ;
235289 opal_atomic_wmb ();
236290 accelerator_rocm_init_complete = true;
0 commit comments