Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions application/nlq/data_access/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class RelationDatabase():
'redshift': 'redshift+psycopg2',
'starrocks': 'starrocks',
'clickhouse': 'clickhouse',
'hive': 'hive'
# Add more mappings here for other databases
}

Expand Down
1 change: 1 addition & 0 deletions application/pages/2_🪙_Data_Connection_Management.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
'redshift': 'Redshift',
'starrocks': 'StarRocks',
'clickhouse': 'Clickhouse',
'hive': 'Hive'
}


Expand Down
6 changes: 5 additions & 1 deletion application/requirements-api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,8 @@ starrocks==1.0.6
clickhouse-sqlalchemy==0.2.6
sagemaker
python-jose
sqlalchemy-redshift~=0.8.14
sqlalchemy-redshift~=0.8.14
numpy==1.26.4
pyhive==0.7.0
thrift==0.20.0
thrift-sasl==0.4.3
6 changes: 5 additions & 1 deletion application/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,8 @@ starrocks==1.0.6
clickhouse-sqlalchemy==0.2.6
sagemaker
fastapi~=0.110.1
sqlalchemy-redshift~=0.8.14
sqlalchemy-redshift~=0.8.14
numpy==1.26.4
pyhive==0.7.0
thrift==0.20.0
thrift-sasl==0.4.3
12 changes: 12 additions & 0 deletions application/utils/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,18 @@
| TRUNC | DATE |
</data_time_function_list>""".format(top_k=TOP_K)

HIVE_DIALECT_PROMPT_CLAUDE3 ="""You are a data analysis expert and proficient in Hive SQL. Given an input question, first create a syntactically correct Hive SQL query to run.
Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per Hive SQL.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. In Hive, column names are typically not wrapped in quotes, so use them as-is.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use CURRENT_DATE function to get the current date, if the question involves "today".
Note that Hive has some differences from traditional SQL:
1. Use backticks (`) instead of double quotes for table or column names if they contain spaces or are reserved keywords.
2. Some functions may have different names or syntax, e.g., use concat() instead of ||.
3. Hive is case-insensitive for keywords and function names.
4. Hive supports both SQL-style comments (-- and /* */) and Hive-style comments (-- and /*+ */).
Aside from giving the SQL answer, concisely explain yourself after giving the answer in the same language as the question.""".format(top_k=TOP_K)

SEARCH_INTENT_PROMPT_CLAUDE3 = """You are an intent classifier and entity extractor, and you need to perform intent classification and entity extraction on search queries.
Background: I want to query data in the database, and you need to help me determine the user's relevant intent and extract the keywords from the query statement. Finally, return a JSON structure.

Expand Down
5 changes: 4 additions & 1 deletion application/utils/prompts/generate_prompt.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from utils.prompt import POSTGRES_DIALECT_PROMPT_CLAUDE3, MYSQL_DIALECT_PROMPT_CLAUDE3, \
DEFAULT_DIALECT_PROMPT, AGENT_COT_EXAMPLE, AWS_REDSHIFT_DIALECT_PROMPT_CLAUDE3, STARROCKS_DIALECT_PROMPT_CLAUDE3, CLICKHOUSE_DIALECT_PROMPT_CLAUDE3
DEFAULT_DIALECT_PROMPT, AGENT_COT_EXAMPLE, AWS_REDSHIFT_DIALECT_PROMPT_CLAUDE3, STARROCKS_DIALECT_PROMPT_CLAUDE3, \
CLICKHOUSE_DIALECT_PROMPT_CLAUDE3, HIVE_DIALECT_PROMPT_CLAUDE3
from utils.prompts import guidance_prompt
from utils.prompts import table_prompt
import logging
Expand Down Expand Up @@ -2206,6 +2207,8 @@ def generate_llm_prompt(ddl, hints, prompt_map, search_box, sql_examples=None, n
dialect_prompt = STARROCKS_DIALECT_PROMPT_CLAUDE3
elif dialect == 'clickhouse':
dialect_prompt = CLICKHOUSE_DIALECT_PROMPT_CLAUDE3
elif dialect == 'hive':
dialect_prompt = HIVE_DIALECT_PROMPT_CLAUDE3
else:
dialect_prompt = DEFAULT_DIALECT_PROMPT

Expand Down
7 changes: 5 additions & 2 deletions application/utils/tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import time
import random
from datetime import datetime
import datetime

import pandas as pd

Expand All @@ -28,7 +28,7 @@ def generate_log_id():


def get_current_time():
now = datetime.now()
now = datetime.datetime.now()
formatted_time = now.strftime('%Y-%m-%d %H:%M:%S')
return formatted_time

Expand Down Expand Up @@ -60,6 +60,9 @@ def convert_timestamps_to_str(data):
if isinstance(item, pd.Timestamp):
# Convert Timestamp to string
new_row.append(item.strftime('%Y-%m-%d %H:%M:%S'))
elif isinstance(item, datetime.date):
# Convert datetime.date to string
new_row.append(item.strftime('%Y-%m-%d %H:%M:%S'))
else:
new_row.append(item)
converted_data.append(new_row)
Expand Down