使用 python 从 PL/SQL 代码中查找源、联接、子查询表

问题描述 投票:0回答:1

我有一个plsql代码,

我想要一个Python代码来帮助我找到源、连接和子查询表以及子查询连接表。请考虑 plsql 代码将根据要求进行更改。

输出就像,

{'source_table': ['schema_name.table1 (rfslt)'], 
 'join_tables': ['schema_name.table2 (b)', 'table3 (doogal)', 'tab_time (d)'],
 'subquery_table': ['schema_name.table6 (e)']}
'subquery_join_table': ['schema_name.table7 (h)']}
{'source_tables': ['table1 (a)'], 
 'join_tables': ['table2 (b)', 'table3 (c)', 'table4 (d)']}
{'source_tables': ['table1 (a)'], 
 'join_tables': ['table2 (b)', 'table3 (c)']}

下面是 plsql 代码,我需要从中找到上面提到的输出。

Insert into tabs 
Select * from (Select * from 
schema_name.table1 rfslt
LEFT OUTER JOIN schema_name.table2 b ON rfslt.key = b.key
LEFT OUTER JOIN table3 doogal ON REPLACE(rfslt.code_c, ' ', '') = doogal.code_derived
LEFT OUTER JOIN (select *
    from schema_name.table6 e
    left join schema_name.table7 h on h.id = e.id order by h.change_name) muk ON rfslt.id=muk.id
) prems
WHERE choose1 = 1
) a,
tab_time b
WHERE TRUNC(a.ok_date) = b.g_date(+);

Insert into tab 
Select a.* 
from table1 a
left join table2 b on (a.col1 = b.col1)
left join table3 c on (a.col2 = c.col2)
inner join table4 d on (b.col3 = d.col3)
Where a.col4 = 'TEST';

Insert into tab 
Select case when a.col1 = 'text' then 'Next on top' end d_col1 
from (Select * from table1 tbl where tbl.col0 = 'sample') a, 
table2 b, 
(Select * from table3 tbl3 where col0 in (select col0 from table4 order by col8) c 
Where a.col1 = b.col1(+) and a.col2 = 'TEST' and a.col3 = c.col3;

我已尝试以下代码来达到要求。

import re

def extract_join_tables(sql_code):
    join_tables = []
    
    # Regular expression to match explicit JOIN statements
    join_pattern = re.compile(r'\b(?:LEFT|RIGHT|INNER|OUTER)?\s*JOIN\s+(?:\((?:[^()]|\((?:[^()]+|\([^()]*\))*\))*\)|[^\s(]+)\s+(?:\b(?:AS\s+)?(\w+)\b)?\s+ON', re.IGNORECASE)
    
    # Regular expression to match simple table references in the FROM clause
    from_pattern = re.compile(r'\bFROM\s+(?:\((?:[^()]|\((?:[^()]+|\([^()]*\))*\))*\)|[^\s(]+)\s+(?:\b(?:AS\s+)?(\w+)\b)?(?:,|$)', re.IGNORECASE)
    
    # Find all JOIN statements
    matches = join_pattern.findall(sql_code)
    for match in matches:
        if match[0]:  # match[0] contains the table name, match[1] contains the alias (if any)
            join_tables.append(match[0])
    
    # Find all simple table references in the FROM clause
    matches = from_pattern.findall(sql_code)
    for match in matches:
        if match[0]:  # match[0] contains the table name, match[1] contains the alias (if any)
            join_tables.append(match[0])
    
    return join_tables

plsql_code = """
Insert into tabs 
Select * from (Select * from 
schema_name.table1 rfslt
LEFT OUTER JOIN schema_name.table2 b ON rfslt.key = b.key
LEFT OUTER JOIN table3 doogal ON REPLACE(rfslt.code_c, ' ', '') = doogal.code_derived
LEFT OUTER JOIN (select *
    from schema_name.table6 e
    left join schema_name.table7 h on h.id = e.id order by h.change_name) muk ON rfslt.id=muk.id
) prems
WHERE choose1 = 1
) a,
tab_time b
WHERE TRUNC(a.ok_date) = b.g_date(+);
"""

join_tables = extract_join_tables(plsql_code)

print("Join Tables and References:")
for table in join_tables:
    print(table)
python plsql
1个回答
0
投票

您的正则表达式存在问题 - 它们无法从子查询和连接中获取数据。另外,将表分为source_tables、join_tables、subquery_table和subquery_join_table的逻辑不完整。与正则表达式一起使用的查找所有方法无法在所有情况下正确捕获组。

我的代码可能需要一些小的编辑,但是我认为我涵盖了您要解决的大部分问题:

import re

def extract_tables(plsql_code):
    results = []
    # Patterns to match different parts of the SQL
    table_alias_pattern = re.compile(r'(\w+(\.\w+)?)(?:\s+(\w+))?', re.IGNORECASE)
    join_pattern = re.compile(r'(LEFT|RIGHT|INNER|OUTER)?\s*JOIN\s+(\w+(\.\w+)?)(?:\s+(\w+))?', re.IGNORECASE)
    subquery_pattern = re.compile(r'\((SELECT.*?FROM\s+.*?\))', re.IGNORECASE | re.DOTALL)
    
    statements = re.split(r';\s*', plsql_code)

    for statement in statements:
        if not statement.strip():
            continue

        result = defaultdict(list)
        subqueries = []

        # Get the data from subqueries
        subqueries = subquery_pattern.findall(statement)
        for subquery in subqueries:
            subquery_text = subquery[0]
            subquery_froms = re.findall(r'from\s+([^\s,]+(?:\s+[^\s,]+)?)', subquery_text, re.IGNORECASE)
            subquery_joins = join_pattern.findall(subquery_text)

            for from_clause in subquery_froms:
                match = table_alias_pattern.match(from_clause)
                if match:
                    table = match.group(1)
                    alias = match.group(3) if match.group(3) else table
                    result['subquery_table'].append(f"{table} ({alias})")

            for join in subquery_joins:
                table = join[1]
                alias = join[3] if join[3] else table
                result['subquery_join_table'].append(f"{table} ({alias})")

        # Remove subqueries to process the main query
        for subquery in subqueries:
            statement = statement.replace(subquery[0], '')

        # Get the data from source tables and joins from the main query
        from_clauses = re.findall(r'from\s+([^\s,]+(?:\s+[^\s,]+)?)', statement, re.IGNORECASE)
        join_clauses = join_pattern.findall(statement)

        for from_clause in from_clauses:
            match = table_alias_pattern.match(from_clause)
            if match:
                table = match.group(1)
                alias = match.group(3) if match.group(3) else table
                result['source_tables'].append(f"{table} ({alias})")

        for join in join_clauses:
            table = join[1]
            alias = join[3] if join[3] else table
            result['join_tables'].append(f"{table} ({alias})")

        results.append(result)

    return results

plsql_code = """
Insert into tabs 
Select * from (Select * from 
schema_name.table1 rfslt
LEFT OUTER JOIN schema_name.table2 b ON rfslt.key = b.key
LEFT OUTER JOIN table3 doogal ON REPLACE(rfslt.code_c, ' ', '') = doogal.code_derived
LEFT OUTER JOIN (select *
    from schema_name.table6 e
    left join schema_name.table7 h on h.id = e.id order by h.change_name) muk ON rfslt.id=muk.id
) prems
WHERE choose1 = 1
) a,
tab_time b
WHERE TRUNC(a.ok_date) = b.g_date(+);

Insert into tab 
Select a.* 
from table1 a
left join table2 b on (a.col1 = b.col1)
left join table3 c on (a.col2 = c.col2)
inner join table4 d on (b.col3 = d.col3)
Where a.col4 = 'TEST';

Insert into tab 
Select case when a.col1 = 'text' then 'Next on top' end d_col1 
from (Select * from table1 tbl where tbl.col0 = 'sample') a, 
table2 b, 
(Select * from table3 tbl3 where col0 in (select col0 from table4 order by col8)) c 
Where a.col1 = b.col1(+) and a.col2 = 'TEST' and a.col3 = c.col3;
"""

results = extract_tables(plsql_code)

for result in results:
    print(result)
© www.soinside.com 2019 - 2024. All rights reserved.