Uses tokenized query returned by python-sqlparse and generates query metadata.
Extracts column names and tables used by the query. Automatically conduct column alias resolution, sub queries aliases resolution as well as tables aliases resolving.
Provides also a helper for normalization of SQL queries.
Supported queries syntax:
- MySQL
- PostgreSQL
- Sqlite
- MSSQL
- Apache Hive
(note that listed backends can differ quite substantially but should work in regard of query types supported by sql-metadata)
You can test the capabilities of sql-metadata with an interactive demo: https://sql-app.infocruncher.com/
pip install sql-metadata from sql_metadata import Parser # extract raw sql-metadata tokens Parser("SELECT * FROM foo").tokens # ['SELECT', '*', 'FROM', 'foo']from sql_metadata import Parser # get columns from query - for more examples see `tests/test_getting_columns.py` Parser("SELECT test, id FROM foo, bar").columns # ['test', 'id'] Parser("INSERT /* VoteHelper::addVote xxx */ INTO `page_vote` (article_id,user_id,`time`) VALUES ('442001','27574631','20180228130846')").columns # ['article_id', 'user_id', 'time'] parser = Parser("SELECT a.* FROM product_a.users AS a JOIN product_b.users AS b ON a.ip_address = b.ip_address") # note that aliases are auto-resolved parser.columns # ['product_a.*', 'product_a.users.ip_address', 'product_b.users.ip_address'] # note that you can also extract columns with their place in the query # which will return dict with lists divided into select, where, order_by, group_by, join, insert and update parser.columns_dict # {'select': ['product_a.users.*'], 'join': ['product_a.users.ip_address', 'product_b.users.ip_address']}from sql_metadata import Parser parser = Parser("SELECT a, (b + c - u) as alias1, custome_func(d) alias2 from aa, bb order by alias1") # note that columns list do not contain aliases of the columns parser.columns # ["a", "b", "c", "u", "d"] # but you can still extract aliases names parser.columns_aliases_names # ["alias1", "alias2"] # aliases are resolved to the columns which they refer to parser.columns_aliases # {"alias1": ["b", "c", "u"], "alias2": "d"} # you can also extract aliases used by section of the query in which they are used parser.columns_aliases_dict # {"order_by": ["alias1"], "select": ["alias1", "alias2"]} # the same applies to aliases used in queries section when you extract columns_dict # here only the alias is used in order by but it's resolved to actual columns assert parser.columns_dict == {'order_by': ['b', 'c', 'u'], 'select': ['a', 'b', 'c', 'u', 'd']}from sql_metadata import Parser # get tables from query - for more examples see `tests/test_getting_tables.py` Parser("SELECT a.* FROM product_a.users AS a JOIN product_b.users AS b ON a.ip_address = b.ip_address").tables # ['product_a.users', 'product_b.users'] Parser("SELECT test, id FROM foo, bar").tables # ['foo', 'bar'] # you can also extract aliases of the tables as a dictionary parser = Parser("SELECT f.test FROM foo AS f") # get table aliases parser.tables_aliases # {'f': 'foo'} # note that aliases are auto-resolved for columns parser.columns # ["foo.test"]from sql_metadata import Parser parser = Parser( "INSERT /* VoteHelper::addVote xxx */ INTO `page_vote` (article_id,user_id,`time`) " "VALUES ('442001','27574631','20180228130846')" ) # extract values from query parser.values # ["442001", "27574631", "20180228130846"] # extract a dictionary with column-value pairs parser.values_dict #{"article_id": "442001", "user_id": "27574631", "time": "20180228130846"} # if column names are not set auto-add placeholders parser = Parser( "INSERT IGNORE INTO `table` VALUES (9, 2.15, '123', '2017-01-01');" ) parser.values # [9, 2.15, "123", "2017-01-01"] parser.values_dict #{"column_1": 9, "column_2": 2.15, "column_3": "123", "column_4": "2017-01-01"}from sql_metadata import Parser Parser('SELECT foo_limit FROM bar_offset LIMIT 50 OFFSET 1000').limit_and_offset # (50, 1000) Parser('SELECT foo_limit FROM bar_offset limit 2000,50').limit_and_offset # (50, 2000)from sql_metadata import Parser parser = Parser( """ WITH database1.tableFromWith AS (SELECT aa.* FROM table3 as aa left join table4 on aa.col1=table4.col2), test as (SELECT * from table3) SELECT "xxxxx" FROM database1.tableFromWith alias LEFT JOIN database2.table2 ON ("tt"."ttt"."fff" = "xx"."xxx") """ ) # get names/ aliases of with statements parser.with_names # ["database1.tableFromWith", "test"] # get definition of with queries parser.with_queries # {"database1.tableFromWith": "SELECT aa.* FROM table3 as aa left join table4 on aa.col1=table4.col2" # "test": "SELECT * from table3"} # note that names of with statements do not appear in tables parser.tables # ["table3", "table4", "database2.table2"]from sql_metadata import Parser parser = Parser( """ SELECT COUNT(1) FROM (SELECT std.task_id FROM some_task_detail std WHERE std.STATUS = 1) a JOIN (SELECT st.task_id FROM some_task st WHERE task_type_id = 80) b ON a.task_id = b.task_id; """ ) # get sub-queries dictionary parser.subqueries # {"a": "SELECT std.task_id FROM some_task_detail std WHERE std.STATUS = 1", # "b": "SELECT st.task_id FROM some_task st WHERE task_type_id = 80"} # get names/ aliases of sub-queries / derived tables parser.subqueries_names # ["a", "b"] # note that columns coming from sub-queries are resolved to real columns parser.columns #["some_task_detail.task_id", "some_task_detail.STATUS", "some_task.task_id", # "task_type_id"] # same applies for columns_dict, note the join columns are resolved parser.columns_dict #{'join': ['some_task_detail.task_id', 'some_task.task_id'], # 'select': ['some_task_detail.task_id', 'some_task.task_id'], # 'where': ['some_task_detail.STATUS', 'task_type_id']}See tests file for more examples of a bit more complex queries.
from sql_metadata import Parser parser = Parser('SELECT /* Test */ foo FROM bar WHERE id in (1, 2, 56)') # generalize query parser.generalize # 'SELECT foo FROM bar WHERE id in (XYZ)' # remove comments parser.without_comments # 'SELECT foo FROM bar WHERE id in (1, 2, 56)' # extract comments parser.comments # ['/* Test */']See test/test_normalization.py file for more examples of a bit more complex queries.
sql_metadata.compat module has been implemented to make the introduction of sql-metadata v2.0 smoother.
You can use it by simply changing the imports in your code from:
from sql_metadata import get_query_columns, get_query_tablesinto:
from sql_metadata.compat import get_query_columns, get_query_tablesThe following functions from the old API are available in the sql_metadata.compat module:
generalize_sqlget_query_columns(since #131 columns aliases ARE NOT returned by this function)get_query_limit_and_offsetget_query_tablesget_query_tokenspreprocess_query
Created and maintained by @macbre with a great contributions from @collerek and the others.
- aborecki (https://github.com/aborecki)
- collerek (https://github.com/collerek)
- dylanhogg (https://github.com/dylanhogg)
- macbre (https://github.com/macbre)