CreateDataQualityRule
Define and enforce data quality rules for tables in Alibaba Cloud DataWorks, ensuring accuracy through customizable thresholds, sampling, and error handling configurations.
Instructions
创建质量规则 *This Tool has a 'MCP Resource',please request CreateDataQualityRule(MCP Resource) to get more examples for using this tool.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| CheckingConfig | No | 样本校验设置 | |
| Description | No | 规则描述信息,最长500个字符 | |
| Enabled | No | 质量规则是否启用 | |
| ErrorHandlers | No | 质量规则校验问题处理器列表 | |
| Name | Yes | 规则名称 | |
| ProjectId | No | DataWorks工作空间ID | |
| SamplingConfig | No | 样本采集所需的设置 | |
| Severity | No | 规则对于业务的等级(对应页面上的强弱规则),可选的枚举值:- Normal- High | |
| Target | Yes | 规则所监控的对象 | |
| TemplateCode | Yes | 规则所引用的规则模板唯一标识 |
Input Schema (JSON Schema)
{ "$schema": "http://json-schema.org/draft-07/schema#", "additionalProperties": false, "properties": { "CheckingConfig": { "additionalProperties": false, "description": "样本校验设置", "properties": { "ReferencedSamplesFilter": { "description": "有些类型的阈值需要查询出一些参考样本,然后对参考样本的值进行汇总得出进行比较的阈值,这里使用一个表达式来表示参考样本的查询方式", "type": "string" }, "Thresholds": { "additionalProperties": false, "description": "阈值设置", "properties": { "Critical": { "additionalProperties": false, "description": "严重警告的阈值设置", "properties": { "Expression": { "description": "阈值表达式。波动率类型规则必须使用表达式方式表示波动阈值。如:- 波动上升大于0.01: $checkValue > 0.01 - 波动下降大于0.01:$checkValue < -0.01 - 波动率绝对值:abs($checkValue) > 0.01固定值类型规则也可以使用表达式方式配置阈值,如果同时配置,表达式优先级高于Operator和Value", "type": "string" }, "Operator": { "description": "比较符:- \\>- \\>=- \\<- \\<=- !=- =", "type": "string" }, "Value": { "description": "阈值数值", "type": "string" } }, "type": "object" }, "Expected": { "additionalProperties": false, "description": "期望的阈值设置", "properties": { "Expression": { "description": "阈值表达式。波动率类型规则必须使用表达式方式表示波动阈值。如:- 波动上升大于0.01: $checkValue > 0.01 - 波动下降大于0.01:$checkValue < -0.01 - 波动率绝对值:abs($checkValue) > 0.01固定值类型规则也可以使用表达式方式配置阈值,如果同时配置,表达式优先级高于Operator和Value", "type": "string" }, "Operator": { "description": "比较符:- \\>- \\>=- \\<- \\<=- !=- =", "type": "string" }, "Value": { "description": "阈值数值", "type": "string" } }, "type": "object" }, "Warned": { "additionalProperties": false, "description": "普通警告的阈值设置", "properties": { "Expression": { "description": "阈值表达式。波动率类型规则必须使用表达式方式表示波动阈值。如:- 波动上升大于0.01: $checkValue > 0.01 - 波动下降大于0.01:$checkValue < -0.01 - 波动率绝对值:abs($checkValue) > 0.01固定值类型规则也可以使用表达式方式配置阈值,如果同时配置,表达式优先级高于Operator和Value", "type": "string" }, "Operator": { "description": "比较符:- \\>- \\>=- \\<- \\<=- !=- =", "type": "string" }, "Value": { "description": "阈值数值", "type": "string" } }, "type": "object" } }, "type": "object" }, "Type": { "description": "阈值计算方式,使用模版时可不设置。-Fixed-Fluctation-FluctationDiscreate-Auto-Average-Variance", "type": "string" } }, "type": "object" }, "Description": { "description": "规则描述信息,最长500个字符", "type": "string" }, "Enabled": { "description": "质量规则是否启用", "type": "boolean" }, "ErrorHandlers": { "description": "质量规则校验问题处理器列表", "items": { "additionalProperties": false, "properties": { "ErrorDataFilter": { "description": "如果是自定义SQL规则,需要用户指定SQL来过滤问题数据", "type": "string" }, "Type": { "description": "处理器类型:- SaveErrorData", "type": "string" } }, "type": "object" }, "type": "array" }, "Name": { "description": "规则名称", "type": "string" }, "ProjectId": { "description": "DataWorks工作空间ID" }, "SamplingConfig": { "additionalProperties": false, "description": "样本采集所需的设置", "properties": { "Metric": { "description": "采样的指标名称,使用模版时可不设置。- Count:表行数- Min:字段最小值- Max:字段最大值- Avg:字段均值- DistinctCount:字段唯一值个数- DistinctPercent:字段唯一值个数与数据行数占比- DuplicatedCount:字段重复值个数- DuplicatedPercent:字段重复值个数与数据行数占比- TableSize:表大小- NullValueCount:字段为空的行数- NullValuePercent:字段为空的比例- GroupCount:按字段值聚合后每个值与对应的数据行数- CountNotIn:枚举值不匹配行数- CountDistinctNotIn:枚举值不匹配唯一值个数- UserDefinedSql:通过自定义SQL做样本采集", "type": "string" }, "MetricParameters": { "description": "样本采集时,所需的参数", "type": "string" }, "SamplingFilter": { "description": "采样时,对不关注的数据进行二次过滤的条件,最多16777215个字符", "type": "string" }, "SettingConfig": { "description": "具体执行采样语句前,插入执行的一些运行时参数设置语句,最长1000个字符。目前只支持MaxCompute", "type": "string" } }, "type": "object" }, "Severity": { "description": "规则对于业务的等级(对应页面上的强弱规则),可选的枚举值:- Normal- High", "type": "string" }, "Target": { "additionalProperties": false, "description": "规则所监控的对象", "properties": { "DatabaseType": { "description": "表类型的数据集,表所属的数据库类型。-maxcompute-emr-cdh-hologres-analyticdb_for_postgresql-analyticdb_for_mysql-starrocks", "type": "string" }, "PartitionSpec": { "description": "分区表的分区设置", "type": "string" }, "TableGuid": { "description": "规则所作用的表在数据地图中的唯一ID", "type": "string" }, "Type": { "description": "监控对象类型。-Table", "type": "string" } }, "required": [ "DatabaseType", "TableGuid" ], "type": "object" }, "TemplateCode": { "description": "规则所引用的规则模板唯一标识", "type": "string" } }, "required": [ "Name", "Target", "TemplateCode" ], "type": "object" }