我有一个plsql代码,
我想要一个Python代码来帮助我找到源、连接和子查询表以及子查询连接表。请考虑 plsql 代码将根据要求进行更改。
输出就像,
{'source_table': ['schema_name.table1 (rfslt)'],
'join_tables': ['schema_name.table2 (b)', 'table3 (doogal)', 'tab_time (d)'],
'subquery_table': ['schema_name.table6 (e)']}
'subquery_join_table': ['schema_name.table7 (h)']}
{'source_tables': ['table1 (a)'],
'join_tables': ['table2 (b)', 'table3 (c)', 'table4 (d)']}
{'source_tables': ['table1 (a)'],
'join_tables': ['table2 (b)', 'table3 (c)']}
下面是 plsql 代码,我需要从中找到上面提到的输出。
Insert into tabs
Select * from (Select * from
schema_name.table1 rfslt
LEFT OUTER JOIN schema_name.table2 b ON rfslt.key = b.key
LEFT OUTER JOIN table3 doogal ON REPLACE(rfslt.code_c, ' ', '') = doogal.code_derived
LEFT OUTER JOIN (select *
from schema_name.table6 e
left join schema_name.table7 h on h.id = e.id order by h.change_name) muk ON rfslt.id=muk.id
) prems
WHERE choose1 = 1
) a,
tab_time b
WHERE TRUNC(a.ok_date) = b.g_date(+);
Insert into tab
Select a.*
from table1 a
left join table2 b on (a.col1 = b.col1)
left join table3 c on (a.col2 = c.col2)
inner join table4 d on (b.col3 = d.col3)
Where a.col4 = 'TEST';
Insert into tab
Select case when a.col1 = 'text' then 'Next on top' end d_col1
from (Select * from table1 tbl where tbl.col0 = 'sample') a,
table2 b,
(Select * from table3 tbl3 where col0 in (select col0 from table4 order by col8) c
Where a.col1 = b.col1(+) and a.col2 = 'TEST' and a.col3 = c.col3;
我已尝试以下代码来达到要求。
import re
def extract_join_tables(sql_code):
join_tables = []
# Regular expression to match explicit JOIN statements
join_pattern = re.compile(r'\b(?:LEFT|RIGHT|INNER|OUTER)?\s*JOIN\s+(?:\((?:[^()]|\((?:[^()]+|\([^()]*\))*\))*\)|[^\s(]+)\s+(?:\b(?:AS\s+)?(\w+)\b)?\s+ON', re.IGNORECASE)
# Regular expression to match simple table references in the FROM clause
from_pattern = re.compile(r'\bFROM\s+(?:\((?:[^()]|\((?:[^()]+|\([^()]*\))*\))*\)|[^\s(]+)\s+(?:\b(?:AS\s+)?(\w+)\b)?(?:,|$)', re.IGNORECASE)
# Find all JOIN statements
matches = join_pattern.findall(sql_code)
for match in matches:
if match[0]: # match[0] contains the table name, match[1] contains the alias (if any)
join_tables.append(match[0])
# Find all simple table references in the FROM clause
matches = from_pattern.findall(sql_code)
for match in matches:
if match[0]: # match[0] contains the table name, match[1] contains the alias (if any)
join_tables.append(match[0])
return join_tables
plsql_code = """
Insert into tabs
Select * from (Select * from
schema_name.table1 rfslt
LEFT OUTER JOIN schema_name.table2 b ON rfslt.key = b.key
LEFT OUTER JOIN table3 doogal ON REPLACE(rfslt.code_c, ' ', '') = doogal.code_derived
LEFT OUTER JOIN (select *
from schema_name.table6 e
left join schema_name.table7 h on h.id = e.id order by h.change_name) muk ON rfslt.id=muk.id
) prems
WHERE choose1 = 1
) a,
tab_time b
WHERE TRUNC(a.ok_date) = b.g_date(+);
"""
join_tables = extract_join_tables(plsql_code)
print("Join Tables and References:")
for table in join_tables:
print(table)
您的正则表达式存在问题 - 它们无法从子查询和连接中获取数据。另外,将表分为source_tables、join_tables、subquery_table和subquery_join_table的逻辑不完整。与正则表达式一起使用的查找所有方法无法在所有情况下正确捕获组。
我的代码可能需要一些小的编辑,但是我认为我涵盖了您要解决的大部分问题:
import re
def extract_tables(plsql_code):
results = []
# Patterns to match different parts of the SQL
table_alias_pattern = re.compile(r'(\w+(\.\w+)?)(?:\s+(\w+))?', re.IGNORECASE)
join_pattern = re.compile(r'(LEFT|RIGHT|INNER|OUTER)?\s*JOIN\s+(\w+(\.\w+)?)(?:\s+(\w+))?', re.IGNORECASE)
subquery_pattern = re.compile(r'\((SELECT.*?FROM\s+.*?\))', re.IGNORECASE | re.DOTALL)
statements = re.split(r';\s*', plsql_code)
for statement in statements:
if not statement.strip():
continue
result = defaultdict(list)
subqueries = []
# Get the data from subqueries
subqueries = subquery_pattern.findall(statement)
for subquery in subqueries:
subquery_text = subquery[0]
subquery_froms = re.findall(r'from\s+([^\s,]+(?:\s+[^\s,]+)?)', subquery_text, re.IGNORECASE)
subquery_joins = join_pattern.findall(subquery_text)
for from_clause in subquery_froms:
match = table_alias_pattern.match(from_clause)
if match:
table = match.group(1)
alias = match.group(3) if match.group(3) else table
result['subquery_table'].append(f"{table} ({alias})")
for join in subquery_joins:
table = join[1]
alias = join[3] if join[3] else table
result['subquery_join_table'].append(f"{table} ({alias})")
# Remove subqueries to process the main query
for subquery in subqueries:
statement = statement.replace(subquery[0], '')
# Get the data from source tables and joins from the main query
from_clauses = re.findall(r'from\s+([^\s,]+(?:\s+[^\s,]+)?)', statement, re.IGNORECASE)
join_clauses = join_pattern.findall(statement)
for from_clause in from_clauses:
match = table_alias_pattern.match(from_clause)
if match:
table = match.group(1)
alias = match.group(3) if match.group(3) else table
result['source_tables'].append(f"{table} ({alias})")
for join in join_clauses:
table = join[1]
alias = join[3] if join[3] else table
result['join_tables'].append(f"{table} ({alias})")
results.append(result)
return results
plsql_code = """
Insert into tabs
Select * from (Select * from
schema_name.table1 rfslt
LEFT OUTER JOIN schema_name.table2 b ON rfslt.key = b.key
LEFT OUTER JOIN table3 doogal ON REPLACE(rfslt.code_c, ' ', '') = doogal.code_derived
LEFT OUTER JOIN (select *
from schema_name.table6 e
left join schema_name.table7 h on h.id = e.id order by h.change_name) muk ON rfslt.id=muk.id
) prems
WHERE choose1 = 1
) a,
tab_time b
WHERE TRUNC(a.ok_date) = b.g_date(+);
Insert into tab
Select a.*
from table1 a
left join table2 b on (a.col1 = b.col1)
left join table3 c on (a.col2 = c.col2)
inner join table4 d on (b.col3 = d.col3)
Where a.col4 = 'TEST';
Insert into tab
Select case when a.col1 = 'text' then 'Next on top' end d_col1
from (Select * from table1 tbl where tbl.col0 = 'sample') a,
table2 b,
(Select * from table3 tbl3 where col0 in (select col0 from table4 order by col8)) c
Where a.col1 = b.col1(+) and a.col2 = 'TEST' and a.col3 = c.col3;
"""
results = extract_tables(plsql_code)
for result in results:
print(result)