Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions paddle/common/layout.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ inline DataLayout StringToDataLayout(const std::string& str) {
return DataLayout::kAnyLayout;
} else if (s == "MKLDNNLAYOUT") {
return DataLayout::kMKLDNN;
} else if (s == "ONEDNNLAYOUT") {
return DataLayout::ONEDNN;
} else if (s == "SPARSE_COO") {
return DataLayout::SPARSE_COO;
} else if (s == "SPARSE_CSR") {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/op_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ inline void RegisterKernelClass(const char* op_type,
std::string library(library_type);
std::string data_layout = "ANYLAYOUT";
if (library == "MKLDNN") {
data_layout = "MKLDNNLAYOUT";
data_layout = "ONEDNNLAYOUT";
}
#ifdef PADDLE_WITH_CUSTOM_DEVICE
if (std::is_same<PlaceType, phi::CustomPlace>::value) {
Expand Down
2 changes: 2 additions & 0 deletions python/paddle/static/quantization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@
)
from .quant2_int8_onednn_pass import ( # noqa: F401
Quant2Int8MkldnnPass,
Quant2Int8OnednnPass,
)
from .quant_int8_onednn_pass import ( # noqa: F401
QuantInt8MkldnnPass,
QuantInt8OnednnPass,
)
from .quanter import ( # noqa: F401
convert,
Expand Down
15 changes: 14 additions & 1 deletion python/paddle/static/quantization/quant2_int8_onednn_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@

import numpy as np

from paddle.utils import deprecated

from ...base.framework import IrGraph
from ...framework import _get_paddle_place, core

OpRole = core.op_proto_and_checker_maker.OpRole


class Quant2Int8MkldnnPass:
class Quant2Int8OnednnPass:
"""
Transform a quant model IrGraph into MKL-DNN supported INT8 IrGraph.
The pass consists of the following transformations:
Expand Down Expand Up @@ -721,3 +723,14 @@ def _quantize_fp32_graph(self, graph):
graph = self._apply_pass(graph, 'int8_scale_calculation_onednn_pass')
graph = self._apply_pass(graph, 'params_quantization_onednn_pass')
return graph


class Quant2Int8MkldnnPass(Quant2Int8OnednnPass):
@deprecated(
since="3.1.0",
update_to="paddle.static.quantization.Quant2Int8OnednnPass",
level=1,
reason="Quant2Int8MkldnnPass will be removed in future",
)
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
21 changes: 17 additions & 4 deletions python/paddle/static/quantization/quant_int8_onednn_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@

import numpy as np

from paddle.utils import deprecated

from ...base.framework import IrGraph
from ...framework import _get_paddle_place


class QuantInt8MkldnnPass:
class QuantInt8OnednnPass:
"""
Convert QuantizationFreezePass generated IrGraph to MKL-DNN supported INT8
IrGraph. Following transformations did in this pass:
Expand Down Expand Up @@ -48,13 +50,13 @@ def __init__(self, _scope=None, _place=None):
>>> # The original graph will be rewrite.
>>> import paddle
>>> from paddle import static
>>> from paddle.static.quantization import QuantInt8MkldnnPass
>>> from paddle.static.quantization import QuantInt8OnednnPass
>>> from paddle.framework import IrGraph
>>> from paddle.framework import core

>>> graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
>>> place = paddle.CPUPlace()
>>> onednn_pass = QuantInt8MkldnnPass(static.global_scope(), place)
>>> onednn_pass = QuantInt8OnednnPass(static.global_scope(), place)
>>> onednn_pass.apply(graph)
"""

Expand Down Expand Up @@ -245,7 +247,7 @@ def _transform_to_quantize_onednn(self, graph, op_node):
quant_op_node = graph.create_op_node(
op_type='quantize',
attrs={
'data_format': 'MKLDNNLAYOUT',
'data_format': 'ONEDNNLAYOUT',
'use_mkldnn': 1,
'Scale': scale_in,
'is_negative_input': 1,
Expand Down Expand Up @@ -287,3 +289,14 @@ def _remove_unused_var_nodes(self, graph):
)
)
graph.safe_remove_nodes(all_unused_vars)


class QuantInt8MkldnnPass(QuantInt8OnednnPass):
@deprecated(
since="3.1.0",
update_to="paddle.static.quantization.QuantInt8OnednnPass",
level=1,
reason="QuantInt8MkldnnPass will be removed in future",
)
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
2 changes: 1 addition & 1 deletion test/cpp/inference/api/config_printer.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ std::ostream &operator<<(std::ostream &os, const AnalysisConfig &config) {
<< "cpu_num_threads: " << config.cpu_math_library_num_threads() << "\n";
os << GenSpaces(num_spaces)
<< "use_tensorrt: " << config.tensorrt_engine_enabled() << "\n";
os << GenSpaces(num_spaces) << "use_mkldnn: " << config.mkldnn_enabled()
os << GenSpaces(num_spaces) << "use_onednn: " << config.onednn_enabled()
<< "\n";
num_spaces--;
os << GenSpaces(num_spaces) << "}\n";
Expand Down
10 changes: 5 additions & 5 deletions test/deprecated/cpp/inference/api/analysis_predictor_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -335,15 +335,15 @@ TEST(AnalysisPredictor, bf16_pass_strategy) {
passStrategy.EnableMkldnnBfloat16();
}

TEST(AnalysisPredictor, mkldnn_fc_pass_strategy) {
TEST(AnalysisPredictor, onednn_fc_pass_strategy) {
std::vector<std::string> passes;
PassStrategy passStrategy(passes);
passStrategy.DisableOnednnFcPasses();
ASSERT_EQ(passes.size(), (size_t)0);
}

#ifdef PADDLE_WITH_DNNL
TEST(AnalysisPredictor, mkldnn_fc_passes_cpu_pass_strategy) {
TEST(AnalysisPredictor, onednn_fc_passes_cpu_pass_strategy) {
CpuPassStrategy cpuPassStrategy;
cpuPassStrategy.EnableONEDNN();
const std::vector<std::string> fc_passes_to_erase(
Expand All @@ -359,15 +359,15 @@ TEST(AnalysisPredictor, mkldnn_fc_passes_cpu_pass_strategy) {
#endif

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
TEST(AnalysisPredictor, mkldnn_fc_passes_gpu_pass_strategy) {
TEST(AnalysisPredictor, onednn_fc_passes_gpu_pass_strategy) {
AnalysisConfig config;
config.EnableUseGpu(100, 0);
config.EnableONEDNN();
config.DisableOnednnFcPasses();
#ifdef PADDLE_WITH_DNNL
ASSERT_TRUE(config.mkldnn_fc_passes_disabled());
ASSERT_TRUE(config.onednn_fc_passes_disabled());
#else
ASSERT_FALSE(config.mkldnn_fc_passes_disabled());
ASSERT_FALSE(config.onednn_fc_passes_disabled());
#endif
}
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ namespace inference {

using paddle::PaddleTensor;

void profile(bool use_mkldnn = false, bool use_bfloat16 = false);
void profile(bool use_onednn = false, bool use_bfloat16 = false);
std::vector<std::vector<paddle::PaddleTensor>> LoadInputData();
void CompareNativeAndAnalysisWrapper(bool use_mkldnn = false);
void CompareNativeAndAnalysisWrapper(bool use_onednn = false);
std::vector<paddle::PaddleTensor> ParseInputStreamToVector(
const std::string &line);

AnalysisConfig SetConfig(bool use_mkldnn = false, bool use_bfloat16 = false);
AnalysisConfig SetConfig(bool use_onednn = false, bool use_bfloat16 = false);

template <typename T>
paddle::PaddleTensor ParseTensor(const std::string &field);
Expand All @@ -50,15 +50,15 @@ TEST(Analyzer_bert, profile) {
}

#ifdef PADDLE_WITH_DNNL
TEST(Analyzer_bert, profile_mkldnn) {
auto use_mkldnn = true;
profile(use_mkldnn);
TEST(Analyzer_bert, profile_onednn) {
auto use_onednn = true;
profile(use_onednn);
}

TEST(Analyzer_bert, profile_mkldnn_bf16) {
auto use_mkldnn = true;
TEST(Analyzer_bert, profile_onednn_bf16) {
auto use_onednn = true;
auto use_bfloat16 = true;
profile(use_mkldnn, use_bfloat16);
profile(use_onednn, use_bfloat16);
}
#endif

Expand All @@ -70,8 +70,8 @@ TEST(Analyzer_bert, compare) {
}
#ifdef PADDLE_WITH_DNNL
TEST(Analyzer_bert, compare_mkldnn) {
auto use_mkldnn = true;
CompareNativeAndAnalysisWrapper(use_mkldnn);
auto use_onednn = true;
CompareNativeAndAnalysisWrapper(use_onednn);
}
#endif

Expand Down Expand Up @@ -135,8 +135,8 @@ TEST(Analyzer_bert, transfer_scope_cache) {
"The size of data cache is not equal to thread number."));
}

void profile(bool use_mkldnn, bool use_bfloat16) {
auto config(SetConfig(use_mkldnn, use_bfloat16));
void profile(bool use_onednn, bool use_bfloat16) {
auto config(SetConfig(use_onednn, use_bfloat16));
std::vector<std::vector<PaddleTensor>> outputs;
auto inputs = LoadInputData();
TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&config),
Expand Down Expand Up @@ -168,8 +168,8 @@ std::vector<std::vector<paddle::PaddleTensor>> LoadInputData() {
return inputs;
}

void CompareNativeAndAnalysisWrapper(bool use_mkldnn) {
auto cfg(SetConfig(use_mkldnn));
void CompareNativeAndAnalysisWrapper(bool use_onednn) {
auto cfg(SetConfig(use_onednn));
auto inputs = LoadInputData();
CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), inputs);
Expand Down Expand Up @@ -201,12 +201,12 @@ std::vector<paddle::PaddleTensor> ParseInputStreamToVector(
return tensors;
}

AnalysisConfig SetConfig(bool use_mkldnn, bool use_bfloat16) {
AnalysisConfig SetConfig(bool use_onednn, bool use_bfloat16) {
AnalysisConfig config;
config.SetModel(FLAGS_infer_model);
config.DisableFCPadding();

if (use_mkldnn) {
if (use_onednn) {
config.EnableONEDNN();
}

Expand Down
14 changes: 7 additions & 7 deletions test/deprecated/ir/inference/auto_scan_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def create_inference_config(
if use_gpu:
config.enable_use_gpu(100, 0)
if not use_mkldnn:
config.disable_mkldnn()
config.disable_onednn()
if use_xpu:
config.enable_xpu()
if passes is not None:
Expand All @@ -248,7 +248,7 @@ def create_inference_config(
return config


class MkldnnAutoScanTest(AutoScanTest):
class OnednnAutoScanTest(AutoScanTest):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

Expand Down Expand Up @@ -336,14 +336,14 @@ def run_test(self, quant=False, *args, **kwargs):

def inference_config_str(self, config) -> str:
dic = {}
enable_mkldnn = config.mkldnn_enabled()
dic["use_mkldnn"] = enable_mkldnn
enable_onednn = config.onednn_enabled()
dic["use_mkldnn"] = enable_onednn
enable_gpu = config.use_gpu()
dic["use_gpu"] = enable_gpu
return str(dic)


class PirMkldnnAutoScanTest(MkldnnAutoScanTest):
class PirOnednnAutoScanTest(OnednnAutoScanTest):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

Expand Down Expand Up @@ -572,8 +572,8 @@ def run_test(self, quant=False, prog_configs=None):

def inference_config_str(self, config) -> str:
dic = {}
enable_mkldnn = config.mkldnn_enabled()
dic["use_mkldnn"] = enable_mkldnn
enable_onednn = config.onednn_enabled()
dic["use_mkldnn"] = enable_onednn
enable_gpu = config.use_gpu()
dic['use_gpu'] = enable_gpu
enable_xpu = config.use_xpu()
Expand Down
16 changes: 8 additions & 8 deletions test/deprecated/ir/inference/inference_pass_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(self, methodName='runTest'):
self.fetch_list = None

self.enable_mkldnn = False
self.enable_mkldnn_bfloat16 = False
self.enable_onednn_bfloat16 = False
self.enable_trt = False
self.enable_tensorrt_varseqlen = False
self.trt_parameters = None
Expand Down Expand Up @@ -143,7 +143,7 @@ def _get_analysis_config(
self.path + ".pdmodel", self.path + ".pdiparams"
)
config.disable_gpu()
config.disable_mkldnn()
config.disable_onednn()
config.switch_specify_input_names(True)
config.switch_ir_optim(True)
config.switch_use_feed_fetch_ops(False)
Expand Down Expand Up @@ -179,7 +179,7 @@ def _get_analysis_config(

elif use_mkldnn:
config.enable_onednn()
if self.enable_mkldnn_bfloat16:
if self.enable_onednn_bfloat16:
config.enable_onednn_bfloat16()
return config

Expand Down Expand Up @@ -285,23 +285,23 @@ def check_output_with_option(

# Check whether the onednn results and the CPU results are the same.
if (not use_gpu) and self.enable_mkldnn:
mkldnn_outputs = self._get_inference_outs(
onednn_outputs = self._get_inference_outs(
self._get_analysis_config(
use_gpu=use_gpu, use_mkldnn=self.enable_mkldnn
)
)

self.assertTrue(
len(paddle_outs) == len(mkldnn_outputs),
len(paddle_outs) == len(onednn_outputs),
"The number of outputs is different between CPU and MKLDNN. ",
)

if self.enable_mkldnn_bfloat16:
if self.enable_onednn_bfloat16:
atol = 0.01
for paddle_out, mkldnn_output in zip(paddle_outs, mkldnn_outputs):
for paddle_out, onednn_output in zip(paddle_outs, onednn_outputs):
np.testing.assert_allclose(
np.array(paddle_out),
mkldnn_output,
onednn_output,
rtol=1e-05,
atol=atol,
err_msg='Output has diff between CPU and MKLDNN. ',
Expand Down
Loading