我们是否可以记录 ssis 包

问题描述 投票:0回答:1

需要由没有视觉工作室的非技术/业务人员来理解该包。 为此,我们需要记录 ssis 包的功能。

python ssis
1个回答
0
投票

使用 python,我们可以读取包代码(xml 格式)并记录所需部分,如变量、连接、先例约束和每个组件/可执行详细信息。

import xml.etree.ElementTree as ET
import graphviz
import matplotlib.pyplot as plt
import networkx as nx

def parse_ssis_package_constraints(file_path):
    # Load and parse the SSIS package
    tree = ET.parse(file_path)
    root = tree.getroot()

    # Define namespaces for the SSIS XML schema
    ns = {
    'DTS': 'www.microsoft.com/SqlServer/Dts',
        'SQLTask': 'www.microsoft.com/sqlserver/dts/tasks/sqltask'
     }

    # Parse precedence constraints
    precedence_constraints = []
    constraints = []
    for constraint in root.findall('.//DTS:PrecedenceConstraint', ns):
        from_component = constraint.attrib.get(f'{{{ns["DTS"]}}}From', 'Unknown Component')
        to_component = constraint.attrib.get(f'{{{ns["DTS"]}}}To', 'Unknown Component')
        object_name = constraint.attrib.get(f'{{{ns["DTS"]}}}ObjectName', 'Unnamed Constraint')

        # Default condition (success if DTS:Value is not specified)
        condition = 'On Success'

        # Check DTS:Value for different conditions
        value = constraint.attrib.get(f'{{{ns["DTS"]}}}Value', None)
        if value == "1":
            condition = 'On Failure'
        elif value == "2":
            condition = 'On Completion'

        # Logical And/Or
        logical_and = constraint.attrib.get(f'{{{ns["DTS"]}}}LogicalAnd', 'False')
        logical_operator = 'Logical AND' if logical_and == 'True' else 'Logical OR'

        # Check if an expression exists
        expression = constraint.attrib.get(f'{{{ns["DTS"]}}}Expression', None)
        if expression:
            condition += f' with Expression ({expression})'

        # Adding human-readable constraint information
        constraint_info = f"Precedence Constraint: {from_component} -> {to_component} ({condition}, {logical_operator}) [{object_name}]"
        precedence_constraints.append(constraint_info)
        constraint_dict = {}
        constraint_dict["From"] = from_component
        constraint_dict["To"] = to_component
        constraint_dict["LogicalAnd"] = logical_operator
        constraint_dict["Value"] = value
        constraint_dict["Expression"] = expression
        constraints.append(constraint_dict)
    return constraints

def create_graph(precedence_constraints):
    """Function to create a directed graph from precedence constraints."""
    # Create a directed graph
    G = nx.DiGraph()

    # Add nodes and edges based on precedence constraints
    for constraint in precedence_constraints:
        from_task = constraint["From"]
        to_task = constraint["To"]

        # Determine the label based on the condition
        if constraint.get("Value") == "2":
            label = "On Completion"
        elif constraint.get("Value") == "1":
            label = "On Failure"
        else:
            label = "On Success"

        # If LogicalAnd is False, use OR condition
        if not constraint.get("LogicalAnd", True):
            label += " (OR Condition)"

        # Add the edge with the label
        G.add_edge(from_task, to_task, label=label)

    # Draw the graph
    pos = nx.spring_layout(G)  # positions for all nodes
    nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=3000, font_size=10, font_weight='bold')
    edge_labels = nx.get_edge_attributes(G, 'label')
    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)

    # Display the graph
    plt.title("SSIS Precedence Constraints")
    plt.show()

def parse_ssis_package(file_path):
    # Load and parse the SSIS package
    tree = ET.parse(file_path)
    root = tree.getroot()

    # Define namespaces for the SSIS XML schema
    ns = {
        'DTS': 'www.microsoft.com/SqlServer/Dts',
        'SQLTask': 'www.microsoft.com/sqlserver/dts/tasks/sqltask'  # Adding SQLTask namespace
    }


    # 1. SSIS package name
    package_name = root.attrib.get(f'{{{ns["DTS"]}}}ObjectName', 'Unnamed Package')

    # 2. Connection Managers
    connections = []
    for connection in root.findall('DTS:ConnectionManagers/DTS:ConnectionManager', ns):
        conn_name = connection.attrib.get(f'{{{ns["DTS"]}}}ObjectName', 'Unnamed Connection')
        conn_type = connection.attrib.get(f'{{{ns["DTS"]}}}CreationName', 'Unknown Type')
         # Fetch the connection string from the DTS:ObjectData section
        conn_string_element = connection.find('.//DTS:ConnectionManager', ns)
        conn_str = conn_string_element.attrib.get(f'{{{ns["DTS"]}}}ConnectionString', 'N/A') if conn_string_element is not None else 'No Connection String'
        connections.append((conn_name, conn_type, conn_str))

    # 3. Variables with Updated Structure
    variables = []
    for variable in root.findall('DTS:Variables/DTS:Variable', ns):
        var_name = variable.attrib.get(f'{{{ns["DTS"]}}}ObjectName', 'Unnamed Variable')
        var_namespace = variable.attrib.get(f'{{{ns["DTS"]}}}Namespace', 'Unknown Namespace')
        var_include_in_debug = variable.attrib.get(f'{{{ns["DTS"]}}}IncludeInDebugDump', 'N/A')

        # Check if the variable has an expression and if it should be evaluated
        var_eval_expression = variable.attrib.get(f'{{{ns["DTS"]}}}EvaluateAsExpression', 'False')
        var_expression = variable.attrib.get(f'{{{ns["DTS"]}}}Expression', None)

        # Find the variable value and its data type
        var_value_element = variable.find('DTS:VariableValue', ns)
        var_value = var_value_element.text if var_value_element is not None else 'No Value'
        var_data_type = var_value_element.attrib.get(f'{{{ns["DTS"]}}}DataType', 'Unknown') if var_value_element is not None else 'Unknown Data Type'

        variables.append({
            'Name': var_name,
            'Namespace': var_namespace,
            'IncludeInDebugDump': var_include_in_debug,
            'DataType': var_data_type,
            'Value': var_value,
            'EvaluateAsExpression': var_eval_expression,
            'Expression': var_expression
        })

    # 4. Execution Order and Component Details
    execution_order = []
    component_details = []

    for executable in root.findall('.//DTS:Executable', ns):
        exec_name = executable.attrib.get(f'{{{ns["DTS"]}}}ObjectName', 'Unnamed Component')
        exec_type = executable.attrib.get(f'{{{ns["DTS"]}}}ExecutableType', 'Unknown Type')

        if exec_type == 'Microsoft.ExecuteSQLTask':
            # Extracting the ObjectData section
            object_data = executable.find('DTS:ObjectData', ns)
            if object_data is not None:
                # Print the XML of ObjectData for debugging
                # print("ObjectData found:")
                # print(ET.tostring(object_data, encoding='unicode'))
                
                # Now, find SqlTaskData within the ObjectData
                sql_task_data = object_data.find('SQLTask:SqlTaskData', ns)

                if exec_type == 'Microsoft.ExecuteSQLTask':
                    # Extracting the ObjectData section
                    object_data = executable.find('DTS:ObjectData', ns)
                    if object_data is not None:
                        # Print the XML of ObjectData for debugging
                        # print("ObjectData found:")
                        # print(ET.tostring(object_data, encoding='unicode'))

                        # Find SqlTaskData within the ObjectData
                        sql_task_data = object_data.find('SQLTask:SqlTaskData', ns)

                        if sql_task_data is not None:
                            # Access the SQL statement attribute with the namespace
                            sql_statement = sql_task_data.attrib.get(f'{{{ns["SQLTask"]}}}SqlStatementSource', None)

                            if sql_statement is not None:
                                # Decode HTML character references
                                sql_statement = sql_statement.replace('
', '\n')  # Change from 
 to 


                                component_details.append(f"Execute SQL Task: {sql_statement}")
                            else:
                                component_details.append(f"Execute SQL Task: No SQL statement found")
                        else:
                            component_details.append(f"Execute SQL Task: No SqlTaskData found inside ObjectData")
                    else:
                        component_details.append(f"Execute SQL Task: No ObjectData found")

        elif exec_type == 'Microsoft.Pipeline':  # Updated to match the example provided
            # Find the pipeline components
            object_data = executable.find('DTS:ObjectData', ns)
            if object_data is not None:
                pipeline = object_data.find('pipeline', ns)
                if pipeline is not None:
                    components = pipeline.find('components', ns)
                    if components is not None:
                        for component in components.findall('component', ns):
                            component_name = component.attrib.get('name', 'Unnamed Component')
                            component_class_id = component.attrib.get('componentClassID', 'Unknown Class ID')
                            # description = component.attrib.get('description', 'No Description')

                            # Extract inputs
                            inputs = component.find('inputs', ns)
                            input_details = []
                            if inputs is not None:
                                for input_elem in inputs.findall('input', ns):
                                    input_name = input_elem.attrib.get('name', 'Unnamed Input')
                                    input_description = input_elem.attrib.get('description', 'No Description')
                                    input_details.append(f"{input_name} ({input_description})")

                            # Extract outputs
                            outputs = component.find('outputs', ns)
                            output_details = []
                            if outputs is not None:
                                for output_elem in outputs.findall('output', ns):
                                    output_name = output_elem.attrib.get('name', 'Unnamed Output')
                                    output_description = output_elem.attrib.get('description', 'No Description')
                                    output_details.append(f"{output_name} ({output_description})")

                            # Summarizing the details
                            input_str = ', '.join(input_details) if input_details else 'No Inputs'
                            output_str = ', '.join(output_details) if output_details else 'No Outputs'
                            # component_details.append(f"Data Flow Component: {component_name} (Class ID: {component_class_id}, Description: {description}, Inputs: [{input_str}], Outputs: [{output_str}])")
                            component_details.append(f"Data Flow Component: {component_name} (Class ID: {component_class_id}, Inputs: [{input_str}], Outputs: [{output_str}])")
                    else:
                        component_details.append(f"No components found in pipeline")
                else:
                    component_details.append(f"No pipeline found in ObjectData")
            else:
                component_details.append(f"No ObjectData found for Data Flow Task")

        execution_order.append(exec_name)
     # 5. Precedence Constraints Parsing with Scenarios
    precedence_constraints = []

    for constraint in root.findall('.//DTS:PrecedenceConstraint', ns):
        from_component = constraint.attrib.get(f'{{{ns["DTS"]}}}From', 'Unknown Component')
        to_component = constraint.attrib.get(f'{{{ns["DTS"]}}}To', 'Unknown Component')
        object_name = constraint.attrib.get(f'{{{ns["DTS"]}}}ObjectName', 'Unnamed Constraint')

        # Default condition (success if DTS:Value is not specified)
        condition = 'On Success'

        # Check DTS:Value for different conditions
        value = constraint.attrib.get(f'{{{ns["DTS"]}}}Value', None)
        if value == "1":
            condition = 'On Failure'
        elif value == "2":
            condition = 'On Completion'

        # Logical And/Or
        logical_and = constraint.attrib.get(f'{{{ns["DTS"]}}}LogicalAnd', 'False')
        logical_operator = 'Logical AND' if logical_and == 'True' else 'Logical OR'

        # Check if an expression exists
        expression = constraint.attrib.get(f'{{{ns["DTS"]}}}Expression', None)
        if expression:
            condition += f' with Expression ({expression})'

        # Adding human-readable constraint information
        constraint_info = f"Precedence Constraint: {from_component} -> {to_component} ({condition}, {logical_operator}) [{object_name}]"
        precedence_constraints.append(constraint_info)

    # Documenting results
    document = {
        'Package Name': package_name,
        'Connections': connections,
        'Variables': variables,
        'Execution Order': execution_order,
        'Component Details': component_details,
        'precedence_constraints':precedence_constraints
    }

    return document

def print_documentation(doc):
    print(f"Package Name: {doc['Package Name']}\n")
    
    print("Connections:")
    for name, conn_type, conn_str in doc['Connections']:
        print(f" - Name: {name}, Type: {conn_type}, Connection String: {conn_str}")
    
    print("\nVariables:")
    for var in doc['Variables']:
        print(f" - Name: {var['Name']}, Namespace: {var['Namespace']}, IncludeInDebugDump: {var['IncludeInDebugDump']}, DataType: {var['DataType']}, Value: {var['Value']}, EvaluateAsExpression: {var['EvaluateAsExpression']}, Expression: {var['Expression']}")

    
    print("\nExecutables:")
    for exec_name in doc['Execution Order']:
        print(f" - {exec_name}")
    
    print("\nComponent Details:")
    for detail in doc['Component Details']:
        print(f" - {detail}")
        
    print("\nprecedence_constraints:")
    for detail in doc['precedence_constraints']:
        print(f" - {detail}")

if __name__ == "__main__":
    ssis_file_path = r"C:\Users\user\Downloads\Integration Services Project7\Integration Services Project7\Integration Services Project7\Q2.dtsx"  # Change this to your .dtsx file path
    documentation = parse_ssis_package(ssis_file_path)
    print_documentation(documentation)
    constraints = parse_ssis_package_constraints(ssis_file_path)
    create_graph(constraints)
© www.soinside.com 2019 - 2024. All rights reserved.