从FTP使用ADF复印14GB文件,以湛蓝的数据湖店

问题描述 投票:-1回答:1

我试图从FTP 14GB文件复制到使用Azure的数据工厂我湛蓝湖水的数据储存。当我执行的管道就开始复制文件和半小时内几乎复制GB 13.9。

剩余的数据即使在运行管线8小时,终于通过提供文件中没有提供信息失败后,是不可复制的。原因不是可用的文件是源队删除的文件的下一个文件。

增加积分单元250

{
    "name": "job_fa",
    "properties": {
        "activities": [
            {
                "name": "set_parameters_adh_or_sch",
                "description": "validate and set the parameter values based on the runtype sch or adh",
                "type": "Lookup",
                "dependsOn": [
                    {
                        "activity": "br_bs_loggin",
                        "dependencyConditions": [
                            "Succeeded"
                        ]
                    }
                ],
                "policy": {
                    "timeout": "7.00:00:00",
                    "retry": 0,
                    "retryIntervalInSeconds": 30,
                    "secureOutput": false,
                    "secureInput": false
                },
                "userProperties": [
                    {
                        "name": "CheckLookup1",
                        "value": "1"
                    }
                ],
                "typeProperties": {
                    "source": {
                        "type": "SqlSource",
                        "sqlReaderStoredProcedureName": "[dbo].[usp_FeedParameters_main]",
                        "storedProcedureParameters": {
                            "FeedName_in": {
                                "type": "String",
                                "value": {
                                    "value": "@pipeline().parameters.p_FeedName",
                                    "type": "Expression"
                                }
                            },
                            "RunType_in": {
                                "type": "String",
                                "value": {
                                    "value": "@pipeline().parameters.p_RunType",
                                    "type": "Expression"
                                }
                            },
                            "SrcEnddate_in": {
                                "type": "String",
                                "value": {
                                    "value": "@pipeline().parameters.p_SrcEndDate",
                                    "type": "Expression"
                                }
                            },
                            "SrcStartdate_in": {
                                "type": "String",
                                "value": {
                                    "value": "@pipeline().parameters.p_SrcStartDate",
                                    "type": "Expression"
                                }
                            },
                            "TgtDate_in": {
                                "type": "String",
                                "value": {
                                    "value": "@pipeline().parameters.p_TargetDate",
                                    "type": "Expression"
                                }
                            },
                            "SrcHour_in": {
                                "type": "String",
                                "value": {
                                    "value": "@pipeline().parameters.p_SrcHour",
                                    "type": "Expression"
                                }
                            },
                            "TgtHour_in": {
                                "type": "String",
                                "value": {
                                    "value": "@pipeline().parameters.p_TgtHour",
                                    "type": "Expression"
                                }
                            }
                        }
                    },
                    "dataset": {
                        "referenceName": "AzureSql_cdpconfiguser",
                        "type": "DatasetReference"
                    },
                    "firstRowOnly": true
                }
            },
            {
                "name": "br_bs_loggin",
                "description": "insert into the batch run and update the batch scheduler to started in case of sch run",
                "type": "Lookup",
                "policy": {
                    "timeout": "7.00:00:00",
                    "retry": 0,
                    "retryIntervalInSeconds": 30,
                    "secureOutput": false,
                    "secureInput": false
                },
                "typeProperties": {
                    "source": {
                        "type": "SqlSource",
                        "sqlReaderStoredProcedureName": "[dbo].[usp_BatchRun]",
                        "storedProcedureParameters": {
                            "FeedName_in": {
                                "type": "String",
                                "value": {
                                    "value": "@pipeline().parameters.p_FeedName",
                                    "type": "Expression"
                                }
                            },
                            "RunType_in": {
                                "type": "String",
                                "value": {
                                    "value": "@pipeline().parameters.p_RunType",
                                    "type": "Expression"
                                }
                            },
                            "Status_in": {
                                "type": "String",
                                "value": "Started"
                            }
                        }
                    },
                    "dataset": {
                        "referenceName": "AzureSql_cdpconfiguser",
                        "type": "DatasetReference"
                    },
                    "firstRowOnly": true
                }
            },
            {
                "name": "Check if file exists in target",
                "type": "GetMetadata",
                "dependsOn": [
                    {
                        "activity": "Copy Data WT to ADLS",
                        "dependencyConditions": [
                            "Succeeded"
                        ]
                    }
                ],
                "policy": {
                    "timeout": "7.00:00:00",
                    "retry": 0,
                    "retryIntervalInSeconds": 30,
                    "secureOutput": false,
                    "secureInput": false
                },
                "typeProperties": {
                    "dataset": {
                        "referenceName": "AzureDataLakeStoreFile_wt_tgt_path_and_name",
                        "type": "DatasetReference",
                        "parameters": {
                            "TgtFilePath": "@activity('set_parameters_adh_or_sch').output.firstrow.TgtFileName_wt_dt_out",
                            "TgtFileName": {
                                "value": "@activity('set_parameters_adh_or_sch').output.firstrow.TgtFileName_wt_dt_out",
                                "type": "Expression"
                            }
                        }
                    },
                    "fieldList": [
                        "exists",
                        "size"
                    ]
                }
            },
            {
                "name": "Copy Data WT to ADLS",
                "type": "Copy",
                "dependsOn": [
                    {
                        "activity": "set_parameters_adh_or_sch",
                        "dependencyConditions": [
                            "Succeeded"
                        ]
                    }
                ],
                "policy": {
                    "timeout": "7.00:00:00",
                    "retry": 0,
                    "retryIntervalInSeconds": 30,
                    "secureOutput": false,
                    "secureInput": false
                },
                "userProperties": [
                    {
                        "name": "Source",
                        "value": "@{activity('set_parameters_adh_or_sch').output.firstrow.SrcFilePath_wo_dt_out}/@{activity('set_parameters_adh_or_sch').output.firstrow.SrcFileName_wt_dt_out}"
                    },
                    {
                        "name": "Destination",
                        "value": "@{activity('set_parameters_adh_or_sch').output.firstrow.TgtFilePath_wt_dt_out}/@{activity('set_parameters_adh_or_sch').output.firstrow.TgtFilePath_wt_dt_out}"
                    }
                ],
                "typeProperties": {
                    "source": {
                        "type": "FileSystemSource",
                        "recursive": true
                    },
                    "sink": {
                        "type": "AzureDataLakeStoreSink"
                    },
                    "enableStaging": false,
                    "dataIntegrationUnits": 0
                },
                "inputs": [
                    {
                        "referenceName": "FTP_SRC_FA",
                        "type": "DatasetReference",
                        "parameters": {
                            "SrcFileName": "@activity('set_parameters_adh_or_sch').output.firstrow.SrcFileName_wt_dt_out",
                            "SrcFilePath": "@activity('set_parameters_adh_or_sch').output.firstrow.SrcFilePath_wo_dt_out"
                        }
                    }
                ],
                "outputs": [
                    {
                        "referenceName": "AzureDataLakeStoreFile_wt_tgt_path_and_name",
                        "type": "DatasetReference",
                        "parameters": {
                            "TgtFilePath": "@activity('set_parameters_adh_or_sch').output.firstrow.TgtFileName_wt_dt_out",
                            "TgtFileName": {
                                "value": "@activity('set_parameters_adh_or_sch').output.firstrow.TgtFileName_wt_dt_out",
                                "type": "Expression"
                            }
                        }
                    }
                ]
            },
            {
                "name": "br_bs_update_failed",
                "type": "SqlServerStoredProcedure",
                "dependsOn": [
                    {
                        "activity": "Copy Data WT to ADLS",
                        "dependencyConditions": [
                            "Failed"
                        ]
                    }
                ],
                "policy": {
                    "timeout": "7.00:00:00",
                    "retry": 0,
                    "retryIntervalInSeconds": 30,
                    "secureOutput": false,
                    "secureInput": false
                },
                "typeProperties": {
                    "storedProcedureName": "[dbo].[usp_BatchRunUpdate]",
                    "storedProcedureParameters": {
                        "BatchId": {
                            "value": {
                                "value": "@activity('br_bs_loggin').output.firstrow.Batchid_out",
                                "type": "Expression"
                            },
                            "type": "String"
                        },
                        "FeedID": {
                            "value": {
                                "value": "@activity('br_bs_loggin').output.firstrow.FeedId_out",
                                "type": "Expression"
                            },
                            "type": "Int32"
                        },
                        "FeedRunId": {
                            "value": {
                                "value": "@activity('br_bs_loggin').output.firstrow.BatchRunId_out",
                                "type": "Expression"
                            },
                            "type": "Int32"
                        },
                        "Status": {
                            "value": "Failed",
                            "type": "String"
                        }
                    }
                },
                "linkedServiceName": {
                    "referenceName": "AzureSqlDatabase1_cdp_dev_sql_db_appconfig",
                    "type": "LinkedServiceReference"
                }
            },
            {
                "name": "If Condition1",
                "type": "IfCondition",
                "dependsOn": [
                    {
                        "activity": "Check if file exists in target",
                        "dependencyConditions": [
                            "Succeeded"
                        ]
                    }
                ],
                "typeProperties": {
                    "expression": {
                        "value": "@equals(activity('Check if file exists in target').output.Exists,true)",
                        "type": "Expression"
                    },
                    "ifFalseActivities": [
                        {
                            "name": "Stored Procedure_failed",
                            "type": "SqlServerStoredProcedure",
                            "policy": {
                                "timeout": "7.00:00:00",
                                "retry": 0,
                                "retryIntervalInSeconds": 30,
                                "secureOutput": false,
                                "secureInput": false
                            },
                            "typeProperties": {
                                "storedProcedureName": "[dbo].[usp_BatchRunUpdate]",
                                "storedProcedureParameters": {
                                    "BatchId": {
                                        "value": {
                                            "value": "@activity('br_bs_loggin').output.firstrow.Batchid_out",
                                            "type": "Expression"
                                        },
                                        "type": "String"
                                    },
                                    "FeedID": {
                                        "value": {
                                            "value": "@activity('br_bs_loggin').output.firstrow.FeedId_out",
                                            "type": "Expression"
                                        },
                                        "type": "Int32"
                                    },
                                    "FeedRunId": {
                                        "value": {
                                            "value": "@activity('br_bs_loggin').output.firstrow.BatchRunId_out",
                                            "type": "Expression"
                                        },
                                        "type": "Int32"
                                    },
                                    "Status": {
                                        "value": "Failed",
                                        "type": "String"
                                    }
                                }
                            },
                            "linkedServiceName": {
                                "referenceName": "AzureSqlDatabase1_cdp_dev_sql_db_appconfig",
                                "type": "LinkedServiceReference"
                            }
                        }
                    ],
                    "ifTrueActivities": [
                        {
                            "name": "Stored Procedure1",
                            "type": "SqlServerStoredProcedure",
                            "policy": {
                                "timeout": "7.00:00:00",
                                "retry": 0,
                                "retryIntervalInSeconds": 30,
                                "secureOutput": false,
                                "secureInput": false
                            },
                            "typeProperties": {
                                "storedProcedureName": "[dbo].[usp_BatchRunUpdate]",
                                "storedProcedureParameters": {
                                    "BatchId": {
                                        "value": {
                                            "value": "@activity('br_bs_loggin').output.firstrow.Batchid_out",
                                            "type": "Expression"
                                        },
                                        "type": "String"
                                    },
                                    "FeedID": {
                                        "value": {
                                            "value": "@activity('br_bs_loggin').output.firstrow.FeedId_out",
                                            "type": "Expression"
                                        },
                                        "type": "Int32"
                                    },
                                    "FeedRunId": {
                                        "value": {
                                            "value": "@activity('br_bs_loggin').output.firstrow.BatchRunId_out",
                                            "type": "Expression"
                                        },
                                        "type": "Int32"
                                    },
                                    "Status": {
                                        "value": "Succeeded",
                                        "type": "String"
                                    }
                                }
                            },
                            "linkedServiceName": {
                                "referenceName": "AzureSqlDatabase1_cdp_dev_sql_db_appconfig",
                                "type": "LinkedServiceReference"
                            }
                        }
                    ]
                }
            }
        ],
        "parameters": {
            "p_FeedName": {
                "type": "String",
                "defaultValue": "fa_cpsmyid_vdumcap1"
            },
            "p_BatchType": {
                "type": "String",
                "defaultValue": "RAW"
            },
            "p_RunType": {
                "type": "String",
                "defaultValue": "sch"
            },
            "p_SrcStartDate": {
                "type": "String"
            },
            "p_SrcEndDate": {
                "type": "String"
            },
            "p_TargetDate": {
                "type": "String"
            },
            "p_SrcHour": {
                "type": "String"
            },
            "p_TgtHour": {
                "type": "String"
            }
        },
        "variables": {
            "v_StartDate": {
                "type": "String"
            },
            "v_EndDate": {
                "type": "String"
            }
        },
        "folder": {
            "name": "Batch_load"
        }
    },
    "type": "Microsoft.DataFactory/factories/pipelines"
}
azure ftp azure-data-factory
1个回答
0
投票

根据您的描述,所有担心的是提高传输性能,我想。

首先,参照Data integration units statementsDIU只适用于Azure Integration Runtime,但不Self-hosted Integration Runtime.Your源数据是从FTP,所以我认为它不受即使你已经设置最多的DIU。(当然,它被称为通过正式文件,你还可以得到从ADF团队验证)

那么,也许你能得到一些线索,以提高从这个document的复制性能。

如:1.尽量使用parallelCopies属性表明您想要复制活动使用的并行性。但它也有从statements一些限制。

2.Try设置水槽数据集作为Azure SQL Data Warehouse,因为它似乎比ADL更好的性能。

enter image description here

3.Try从源数据集压缩文件,以减小文件大小。

4.Consider使用Azure的云服务作为源数据集,如Azure的Blob存储,因为我知道,副本活动的Azure服务之间的性能比较好一般。

© www.soinside.com 2019 - 2024. All rights reserved.