Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/cudamatrix/cu-common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,25 @@ void GetBlockSizesForSimpleMatrixOperation(int32 num_rows,
"Matrix has too many rows to process");
dimGrid->z = 1;
}

const char* cublasGetStatusString(cublasStatus_t status)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for the PR.
So there wasn't an official version of this function, that we could have used?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that there wasn't an official function like this, but please use Google style w.r.t. open-braces: put them on the same line as the previous one, after a space, e.g.
const char* cublasGetStatusString(cublasStatus_t status) {

And please put a comment before the declaration of this function, saying:
// This is analogous to the CUDA function cudaGetErrorString().

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reformat done.
Howerver, the CI raise some reasonless error :(

{
switch(status)
{
case CUBLAS_STATUS_SUCCESS: return "CUBLAS_STATUS_SUCCESS";
case CUBLAS_STATUS_NOT_INITIALIZED: return "CUBLAS_STATUS_NOT_INITIALIZED";
case CUBLAS_STATUS_ALLOC_FAILED: return "CUBLAS_STATUS_ALLOC_FAILED";
case CUBLAS_STATUS_INVALID_VALUE: return "CUBLAS_STATUS_INVALID_VALUE";
case CUBLAS_STATUS_ARCH_MISMATCH: return "CUBLAS_STATUS_ARCH_MISMATCH";
case CUBLAS_STATUS_MAPPING_ERROR: return "CUBLAS_STATUS_MAPPING_ERROR";
case CUBLAS_STATUS_EXECUTION_FAILED: return "CUBLAS_STATUS_EXECUTION_FAILED";
case CUBLAS_STATUS_INTERNAL_ERROR: return "CUBLAS_STATUS_INTERNAL_ERROR";
case CUBLAS_STATUS_NOT_SUPPORTED: return "CUBLAS_STATUS_NOT_SUPPORTED";
case CUBLAS_STATUS_LICENSE_ERROR: return "CUBLAS_STATUS_LICENSE_ERROR";
}
return "CUBLAS_STATUS_UNKNOWN_ERROR";
}

#endif

} // namespace
Expand Down
12 changes: 10 additions & 2 deletions src/cudamatrix/cu-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@
#include <cublas_v2.h>
#include <cuda_runtime_api.h>



#define CU_SAFE_CALL(fun) \
{ \
int32 ret; \
Expand All @@ -43,6 +41,14 @@
} \
}

#define CUBLAS_SAFE_CALL(fun) \
{ \
int32 ret; \
if ((ret = (fun)) != 0) { \
KALDI_ERR << "cublasStatus_t " << ret << " : \"" << cublasGetStatusString((cublasStatus_t)ret) << "\" returned from '" << #fun << "'"; \
} \
}

#define KALDI_CUDA_ERR(ret, msg) \
{ \
if (ret != 0) { \
Expand Down Expand Up @@ -76,6 +82,8 @@ void GetBlockSizesForSimpleMatrixOperation(int32 num_rows,
dim3 *dimBlock);


const char* cublasGetStatusString(cublasStatus_t status);

}

#endif // HAVE_CUDA
Expand Down
2 changes: 1 addition & 1 deletion src/cudamatrix/cu-device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ void CuDevice::FinalizeActiveGpu() {
// Remember the id of active GPU
active_gpu_id_ = act_gpu_id; // CuDevice::Enabled() is true from now on
// Initialize the CUBLAS
CU_SAFE_CALL(cublasCreate(&handle_));
CUBLAS_SAFE_CALL(cublasCreate(&handle_));

// Notify user which GPU is finally used
char name[128];
Expand Down
27 changes: 14 additions & 13 deletions src/cudamatrix/cu-matrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1161,7 +1161,7 @@ void CuMatrixBase<Real>::AddMatMat(
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
CuTimer tim;
CU_SAFE_CALL(cublas_gemm(GetCublasHandle(),
CUBLAS_SAFE_CALL(cublas_gemm(GetCublasHandle(),
(transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N),
(transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N),
m, n, k, alpha, B.data_, B.Stride(),
Expand All @@ -1188,8 +1188,8 @@ void CuMatrixBase<Real>::AddVecVec(
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
CuTimer tim;
CU_SAFE_CALL(cublas_ger(GetCublasHandle(), m, n, alpha,
y.Data(), 1, x.Data(), 1, data_, Stride()));
CUBLAS_SAFE_CALL(cublas_ger(GetCublasHandle(), m, n, alpha,
y.Data(), 1, x.Data(), 1, data_, Stride()));

CuDevice::Instantiate().AccuProfile(__func__, tim);
} else
Expand All @@ -1215,9 +1215,10 @@ void CuMatrixBase<Real>::SymAddMat2(
CuTimer tim;
cublasOperation_t trans = (transA == kTrans ? CUBLAS_OP_N : CUBLAS_OP_T);
MatrixIndexT A_other_dim = (transA == kNoTrans ? A.num_cols_ : A.num_rows_);
CU_SAFE_CALL(cublas_syrk(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, trans,
num_rows_, A_other_dim, alpha, A.Data(), A.Stride(),
beta, this->data_, this->stride_));
CUBLAS_SAFE_CALL(cublas_syrk(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER,
trans, num_rows_, A_other_dim,
alpha, A.Data(), A.Stride(),
beta, this->data_, this->stride_));

CuDevice::Instantiate().AccuProfile(__func__, tim);
} else
Expand Down Expand Up @@ -2106,13 +2107,13 @@ void AddMatMatBatched(const Real alpha, std::vector<CuSubMatrix<Real>* > &C,

CU_SAFE_CALL(cudaMemcpy(device_abc_array, host_abc_array, 3*size*sizeof(Real*), cudaMemcpyHostToDevice));

CU_SAFE_CALL(cublas_gemmBatched(GetCublasHandle(),
(transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N),
(transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N),
m, n, k, alpha, device_b_array,
B[0]->Stride(), device_a_array,
A[0]->Stride(), beta, device_c_array,
C[0]->Stride(), size));
CUBLAS_SAFE_CALL(cublas_gemmBatched(GetCublasHandle(),
(transB==kTrans? CUBLAS_OP_T:CUBLAS_OP_N),
(transA==kTrans? CUBLAS_OP_T:CUBLAS_OP_N),
m, n, k, alpha, device_b_array,
B[0]->Stride(), device_a_array,
A[0]->Stride(), beta, device_c_array,
C[0]->Stride(), size));

CuDevice::Instantiate().Free(device_abc_array);
delete[] host_abc_array;
Expand Down
2 changes: 1 addition & 1 deletion src/cudamatrix/cu-packed-matrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ void CuPackedMatrix<Real>::Scale(Real alpha) {
CuTimer tim;
size_t nr = static_cast<size_t>(num_rows_),
num_elements = ((nr * (nr+1)) / 2);
CU_SAFE_CALL(cublas_scal(GetCublasHandle(), num_elements, alpha, data_, 1));
CUBLAS_SAFE_CALL(cublas_scal(GetCublasHandle(), num_elements, alpha, data_, 1));

CuDevice::Instantiate().AccuProfile("CuPackedMatrix::Scale", tim);
} else
Expand Down
4 changes: 2 additions & 2 deletions src/cudamatrix/cu-sp-matrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ void CuSpMatrix<Real>::AddVec2(const Real alpha, const CuVectorBase<Real> &v) {
dim3 dimBlock(CU2DBLOCK, CU2DBLOCK);
dim3 dimGrid(n_blocks(nr, CU2DBLOCK), n_blocks(nr, CU2DBLOCK));

CU_SAFE_CALL(cublas_spr(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, this->num_rows_, alpha, v.Data(),
1, this->Data()));
CUBLAS_SAFE_CALL(cublas_spr(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, this->num_rows_, alpha, v.Data(),
1, this->Data()));

CuDevice::Instantiate().AccuProfile("CuSpMatrix::AddVec2", tim);
} else
Expand Down
2 changes: 1 addition & 1 deletion src/cudamatrix/cu-tp-matrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ void CuTpMatrix<Real>::Invert() {
CU_SAFE_CALL(cudaGetLastError());
CuMatrix<Real> tmp2(dim, dim);
tmp2.CopyFromTp(*this);
CU_SAFE_CALL(cublas_trsm(GetCublasHandle(), dim, dim, alpha, tmp2.Data(), tmp2.Dim().stride,
CUBLAS_SAFE_CALL(cublas_trsm(GetCublasHandle(), dim, dim, alpha, tmp2.Data(), tmp2.Dim().stride,
tmp.Data(), tmp.Dim().stride));
this->CopyFromMat(tmp, kNoTrans);
CuDevice::Instantiate().AccuProfile(__func__, tim);
Expand Down
24 changes: 12 additions & 12 deletions src/cudamatrix/cu-vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ Real VecVec(const CuVectorBase<Real> &a,
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
CuTimer tim;
CU_SAFE_CALL(cublas_dot(GetCublasHandle(), a.Dim(), a.Data(), 1, b.Data(),
1, &result));
CUBLAS_SAFE_CALL(cublas_dot(GetCublasHandle(), a.Dim(), a.Data(), 1, b.Data(),
1, &result));
CuDevice::Instantiate().AccuProfile(__func__, tim);
} else
#endif
Expand Down Expand Up @@ -451,10 +451,10 @@ void CuVectorBase<Real>::AddMatVec(const Real alpha,

// Everything is backwards in CuBlas. We need to reverse rows, columns,
// transpose-ness.
CU_SAFE_CALL(cublas_gemv(GetCublasHandle(),
(trans==kTrans? CUBLAS_OP_N:CUBLAS_OP_T),
M.NumCols(), M.NumRows(), alpha, M.Data(),
M.Stride(), v.Data(), 1, beta, data_, 1));
CUBLAS_SAFE_CALL(cublas_gemv(GetCublasHandle(),
(trans==kTrans? CUBLAS_OP_N:CUBLAS_OP_T),
M.NumCols(), M.NumRows(), alpha, M.Data(),
M.Stride(), v.Data(), 1, beta, data_, 1));

CuDevice::Instantiate().AccuProfile(__func__, tim);
} else
Expand All @@ -478,8 +478,8 @@ void CuVectorBase<Real>::AddSpVec(const Real alpha,

// Note: in our opinion the CuSpMatrix represents a lower-triangular matrix, but
// in CUBLAS, for some stupid reason, everything is reversed.
CU_SAFE_CALL(cublas_spmv(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, Dim(),
alpha, M.Data(), v.Data(), 1, beta, data_, 1));
CUBLAS_SAFE_CALL(cublas_spmv(GetCublasHandle(), CUBLAS_FILL_MODE_UPPER, Dim(),
alpha, M.Data(), v.Data(), 1, beta, data_, 1));

CuDevice::Instantiate().AccuProfile(__func__, tim);
} else
Expand Down Expand Up @@ -775,7 +775,7 @@ void CuVectorBase<double>::CopyFromVec(const CuVectorBase<float> &src) {
if (CuDevice::Instantiate().Enabled()) {
if (dim_ == 0) return;
CuTimer tim;
CU_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1));
CUBLAS_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1));
CuDevice::Instantiate().AccuProfile(__func__, tim);
} else
#endif
Expand All @@ -792,7 +792,7 @@ void CuVectorBase<float>::CopyFromVec(const CuVectorBase<double> &src) {
if (CuDevice::Instantiate().Enabled()) {
if (dim_ == 0) return;
CuTimer tim;
CU_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1));
CUBLAS_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, src.Data(), 1, data_, 1));
CuDevice::Instantiate().AccuProfile(__func__, tim);
} else
#endif
Expand Down Expand Up @@ -1089,8 +1089,8 @@ void CuVectorBase<Real>::CopyDiagFromMat(const CuMatrix<Real> &M) {
if (CuDevice::Instantiate().Enabled()) {
KALDI_ASSERT(dim_ == std::min(M.NumRows(), M.NumCols()));
CuTimer tim;
CU_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, M.Data(), M.Stride() + 1,
data_, 1));
CUBLAS_SAFE_CALL(cublas_copy(GetCublasHandle(), dim_, M.Data(), M.Stride() + 1,
data_, 1));

CuDevice::Instantiate().AccuProfile(__func__, tim);
} else
Expand Down