需要由没有视觉工作室的非技术/业务人员来理解该包。 为此,我们需要记录 ssis 包的功能。
使用 python,我们可以读取包代码(xml 格式)并记录所需部分,如变量、连接、先例约束和每个组件/可执行详细信息。
import xml.etree.ElementTree as ET
import graphviz
import matplotlib.pyplot as plt
import networkx as nx
def parse_ssis_package_constraints(file_path):
# Load and parse the SSIS package
tree = ET.parse(file_path)
root = tree.getroot()
# Define namespaces for the SSIS XML schema
ns = {
'DTS': 'www.microsoft.com/SqlServer/Dts',
'SQLTask': 'www.microsoft.com/sqlserver/dts/tasks/sqltask'
}
# Parse precedence constraints
precedence_constraints = []
constraints = []
for constraint in root.findall('.//DTS:PrecedenceConstraint', ns):
from_component = constraint.attrib.get(f'{{{ns["DTS"]}}}From', 'Unknown Component')
to_component = constraint.attrib.get(f'{{{ns["DTS"]}}}To', 'Unknown Component')
object_name = constraint.attrib.get(f'{{{ns["DTS"]}}}ObjectName', 'Unnamed Constraint')
# Default condition (success if DTS:Value is not specified)
condition = 'On Success'
# Check DTS:Value for different conditions
value = constraint.attrib.get(f'{{{ns["DTS"]}}}Value', None)
if value == "1":
condition = 'On Failure'
elif value == "2":
condition = 'On Completion'
# Logical And/Or
logical_and = constraint.attrib.get(f'{{{ns["DTS"]}}}LogicalAnd', 'False')
logical_operator = 'Logical AND' if logical_and == 'True' else 'Logical OR'
# Check if an expression exists
expression = constraint.attrib.get(f'{{{ns["DTS"]}}}Expression', None)
if expression:
condition += f' with Expression ({expression})'
# Adding human-readable constraint information
constraint_info = f"Precedence Constraint: {from_component} -> {to_component} ({condition}, {logical_operator}) [{object_name}]"
precedence_constraints.append(constraint_info)
constraint_dict = {}
constraint_dict["From"] = from_component
constraint_dict["To"] = to_component
constraint_dict["LogicalAnd"] = logical_operator
constraint_dict["Value"] = value
constraint_dict["Expression"] = expression
constraints.append(constraint_dict)
return constraints
def create_graph(precedence_constraints):
"""Function to create a directed graph from precedence constraints."""
# Create a directed graph
G = nx.DiGraph()
# Add nodes and edges based on precedence constraints
for constraint in precedence_constraints:
from_task = constraint["From"]
to_task = constraint["To"]
# Determine the label based on the condition
if constraint.get("Value") == "2":
label = "On Completion"
elif constraint.get("Value") == "1":
label = "On Failure"
else:
label = "On Success"
# If LogicalAnd is False, use OR condition
if not constraint.get("LogicalAnd", True):
label += " (OR Condition)"
# Add the edge with the label
G.add_edge(from_task, to_task, label=label)
# Draw the graph
pos = nx.spring_layout(G) # positions for all nodes
nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=3000, font_size=10, font_weight='bold')
edge_labels = nx.get_edge_attributes(G, 'label')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
# Display the graph
plt.title("SSIS Precedence Constraints")
plt.show()
def parse_ssis_package(file_path):
# Load and parse the SSIS package
tree = ET.parse(file_path)
root = tree.getroot()
# Define namespaces for the SSIS XML schema
ns = {
'DTS': 'www.microsoft.com/SqlServer/Dts',
'SQLTask': 'www.microsoft.com/sqlserver/dts/tasks/sqltask' # Adding SQLTask namespace
}
# 1. SSIS package name
package_name = root.attrib.get(f'{{{ns["DTS"]}}}ObjectName', 'Unnamed Package')
# 2. Connection Managers
connections = []
for connection in root.findall('DTS:ConnectionManagers/DTS:ConnectionManager', ns):
conn_name = connection.attrib.get(f'{{{ns["DTS"]}}}ObjectName', 'Unnamed Connection')
conn_type = connection.attrib.get(f'{{{ns["DTS"]}}}CreationName', 'Unknown Type')
# Fetch the connection string from the DTS:ObjectData section
conn_string_element = connection.find('.//DTS:ConnectionManager', ns)
conn_str = conn_string_element.attrib.get(f'{{{ns["DTS"]}}}ConnectionString', 'N/A') if conn_string_element is not None else 'No Connection String'
connections.append((conn_name, conn_type, conn_str))
# 3. Variables with Updated Structure
variables = []
for variable in root.findall('DTS:Variables/DTS:Variable', ns):
var_name = variable.attrib.get(f'{{{ns["DTS"]}}}ObjectName', 'Unnamed Variable')
var_namespace = variable.attrib.get(f'{{{ns["DTS"]}}}Namespace', 'Unknown Namespace')
var_include_in_debug = variable.attrib.get(f'{{{ns["DTS"]}}}IncludeInDebugDump', 'N/A')
# Check if the variable has an expression and if it should be evaluated
var_eval_expression = variable.attrib.get(f'{{{ns["DTS"]}}}EvaluateAsExpression', 'False')
var_expression = variable.attrib.get(f'{{{ns["DTS"]}}}Expression', None)
# Find the variable value and its data type
var_value_element = variable.find('DTS:VariableValue', ns)
var_value = var_value_element.text if var_value_element is not None else 'No Value'
var_data_type = var_value_element.attrib.get(f'{{{ns["DTS"]}}}DataType', 'Unknown') if var_value_element is not None else 'Unknown Data Type'
variables.append({
'Name': var_name,
'Namespace': var_namespace,
'IncludeInDebugDump': var_include_in_debug,
'DataType': var_data_type,
'Value': var_value,
'EvaluateAsExpression': var_eval_expression,
'Expression': var_expression
})
# 4. Execution Order and Component Details
execution_order = []
component_details = []
for executable in root.findall('.//DTS:Executable', ns):
exec_name = executable.attrib.get(f'{{{ns["DTS"]}}}ObjectName', 'Unnamed Component')
exec_type = executable.attrib.get(f'{{{ns["DTS"]}}}ExecutableType', 'Unknown Type')
if exec_type == 'Microsoft.ExecuteSQLTask':
# Extracting the ObjectData section
object_data = executable.find('DTS:ObjectData', ns)
if object_data is not None:
# Print the XML of ObjectData for debugging
# print("ObjectData found:")
# print(ET.tostring(object_data, encoding='unicode'))
# Now, find SqlTaskData within the ObjectData
sql_task_data = object_data.find('SQLTask:SqlTaskData', ns)
if exec_type == 'Microsoft.ExecuteSQLTask':
# Extracting the ObjectData section
object_data = executable.find('DTS:ObjectData', ns)
if object_data is not None:
# Print the XML of ObjectData for debugging
# print("ObjectData found:")
# print(ET.tostring(object_data, encoding='unicode'))
# Find SqlTaskData within the ObjectData
sql_task_data = object_data.find('SQLTask:SqlTaskData', ns)
if sql_task_data is not None:
# Access the SQL statement attribute with the namespace
sql_statement = sql_task_data.attrib.get(f'{{{ns["SQLTask"]}}}SqlStatementSource', None)
if sql_statement is not None:
# Decode HTML character references
sql_statement = sql_statement.replace(' ', '\n') # Change from 
 to
component_details.append(f"Execute SQL Task: {sql_statement}")
else:
component_details.append(f"Execute SQL Task: No SQL statement found")
else:
component_details.append(f"Execute SQL Task: No SqlTaskData found inside ObjectData")
else:
component_details.append(f"Execute SQL Task: No ObjectData found")
elif exec_type == 'Microsoft.Pipeline': # Updated to match the example provided
# Find the pipeline components
object_data = executable.find('DTS:ObjectData', ns)
if object_data is not None:
pipeline = object_data.find('pipeline', ns)
if pipeline is not None:
components = pipeline.find('components', ns)
if components is not None:
for component in components.findall('component', ns):
component_name = component.attrib.get('name', 'Unnamed Component')
component_class_id = component.attrib.get('componentClassID', 'Unknown Class ID')
# description = component.attrib.get('description', 'No Description')
# Extract inputs
inputs = component.find('inputs', ns)
input_details = []
if inputs is not None:
for input_elem in inputs.findall('input', ns):
input_name = input_elem.attrib.get('name', 'Unnamed Input')
input_description = input_elem.attrib.get('description', 'No Description')
input_details.append(f"{input_name} ({input_description})")
# Extract outputs
outputs = component.find('outputs', ns)
output_details = []
if outputs is not None:
for output_elem in outputs.findall('output', ns):
output_name = output_elem.attrib.get('name', 'Unnamed Output')
output_description = output_elem.attrib.get('description', 'No Description')
output_details.append(f"{output_name} ({output_description})")
# Summarizing the details
input_str = ', '.join(input_details) if input_details else 'No Inputs'
output_str = ', '.join(output_details) if output_details else 'No Outputs'
# component_details.append(f"Data Flow Component: {component_name} (Class ID: {component_class_id}, Description: {description}, Inputs: [{input_str}], Outputs: [{output_str}])")
component_details.append(f"Data Flow Component: {component_name} (Class ID: {component_class_id}, Inputs: [{input_str}], Outputs: [{output_str}])")
else:
component_details.append(f"No components found in pipeline")
else:
component_details.append(f"No pipeline found in ObjectData")
else:
component_details.append(f"No ObjectData found for Data Flow Task")
execution_order.append(exec_name)
# 5. Precedence Constraints Parsing with Scenarios
precedence_constraints = []
for constraint in root.findall('.//DTS:PrecedenceConstraint', ns):
from_component = constraint.attrib.get(f'{{{ns["DTS"]}}}From', 'Unknown Component')
to_component = constraint.attrib.get(f'{{{ns["DTS"]}}}To', 'Unknown Component')
object_name = constraint.attrib.get(f'{{{ns["DTS"]}}}ObjectName', 'Unnamed Constraint')
# Default condition (success if DTS:Value is not specified)
condition = 'On Success'
# Check DTS:Value for different conditions
value = constraint.attrib.get(f'{{{ns["DTS"]}}}Value', None)
if value == "1":
condition = 'On Failure'
elif value == "2":
condition = 'On Completion'
# Logical And/Or
logical_and = constraint.attrib.get(f'{{{ns["DTS"]}}}LogicalAnd', 'False')
logical_operator = 'Logical AND' if logical_and == 'True' else 'Logical OR'
# Check if an expression exists
expression = constraint.attrib.get(f'{{{ns["DTS"]}}}Expression', None)
if expression:
condition += f' with Expression ({expression})'
# Adding human-readable constraint information
constraint_info = f"Precedence Constraint: {from_component} -> {to_component} ({condition}, {logical_operator}) [{object_name}]"
precedence_constraints.append(constraint_info)
# Documenting results
document = {
'Package Name': package_name,
'Connections': connections,
'Variables': variables,
'Execution Order': execution_order,
'Component Details': component_details,
'precedence_constraints':precedence_constraints
}
return document
def print_documentation(doc):
print(f"Package Name: {doc['Package Name']}\n")
print("Connections:")
for name, conn_type, conn_str in doc['Connections']:
print(f" - Name: {name}, Type: {conn_type}, Connection String: {conn_str}")
print("\nVariables:")
for var in doc['Variables']:
print(f" - Name: {var['Name']}, Namespace: {var['Namespace']}, IncludeInDebugDump: {var['IncludeInDebugDump']}, DataType: {var['DataType']}, Value: {var['Value']}, EvaluateAsExpression: {var['EvaluateAsExpression']}, Expression: {var['Expression']}")
print("\nExecutables:")
for exec_name in doc['Execution Order']:
print(f" - {exec_name}")
print("\nComponent Details:")
for detail in doc['Component Details']:
print(f" - {detail}")
print("\nprecedence_constraints:")
for detail in doc['precedence_constraints']:
print(f" - {detail}")
if __name__ == "__main__":
ssis_file_path = r"C:\Users\user\Downloads\Integration Services Project7\Integration Services Project7\Integration Services Project7\Q2.dtsx" # Change this to your .dtsx file path
documentation = parse_ssis_package(ssis_file_path)
print_documentation(documentation)
constraints = parse_ssis_package_constraints(ssis_file_path)
create_graph(constraints)