Я пытаюсь скопировать файл размером 14 ГБ с FTP в мое хранилище озера данных Azure с помощью фабрики данных Azure.Когда я выполнил конвейер, он начал копировать файл и скопировал почти 13,9 Гб за полчаса.
Оставшиеся данные не копируются даже после 8-часового запуска конвейера и, в конце концов, не удалось, сообщив, что файл недоступен.Причина отсутствия файла в том, что исходная команда удалила файл для следующего файла.
Увеличен модуль интеграции до 250
{
"name": "job_fa",
"properties": {
"activities": [
{
"name": "set_parameters_adh_or_sch",
"description": "validate and set the parameter values based on the runtype sch or adh",
"type": "Lookup",
"dependsOn": [
{
"activity": "br_bs_loggin",
"dependencyConditions": [
"Succeeded"
]
}
],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [
{
"name": "CheckLookup1",
"value": "1"
}
],
"typeProperties": {
"source": {
"type": "SqlSource",
"sqlReaderStoredProcedureName": "[dbo].[usp_FeedParameters_main]",
"storedProcedureParameters": {
"FeedName_in": {
"type": "String",
"value": {
"value": "@pipeline().parameters.p_FeedName",
"type": "Expression"
}
},
"RunType_in": {
"type": "String",
"value": {
"value": "@pipeline().parameters.p_RunType",
"type": "Expression"
}
},
"SrcEnddate_in": {
"type": "String",
"value": {
"value": "@pipeline().parameters.p_SrcEndDate",
"type": "Expression"
}
},
"SrcStartdate_in": {
"type": "String",
"value": {
"value": "@pipeline().parameters.p_SrcStartDate",
"type": "Expression"
}
},
"TgtDate_in": {
"type": "String",
"value": {
"value": "@pipeline().parameters.p_TargetDate",
"type": "Expression"
}
},
"SrcHour_in": {
"type": "String",
"value": {
"value": "@pipeline().parameters.p_SrcHour",
"type": "Expression"
}
},
"TgtHour_in": {
"type": "String",
"value": {
"value": "@pipeline().parameters.p_TgtHour",
"type": "Expression"
}
}
}
},
"dataset": {
"referenceName": "AzureSql_cdpconfiguser",
"type": "DatasetReference"
},
"firstRowOnly": true
}
},
{
"name": "br_bs_loggin",
"description": "insert into the batch run and update the batch scheduler to started in case of sch run",
"type": "Lookup",
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"typeProperties": {
"source": {
"type": "SqlSource",
"sqlReaderStoredProcedureName": "[dbo].[usp_BatchRun]",
"storedProcedureParameters": {
"FeedName_in": {
"type": "String",
"value": {
"value": "@pipeline().parameters.p_FeedName",
"type": "Expression"
}
},
"RunType_in": {
"type": "String",
"value": {
"value": "@pipeline().parameters.p_RunType",
"type": "Expression"
}
},
"Status_in": {
"type": "String",
"value": "Started"
}
}
},
"dataset": {
"referenceName": "AzureSql_cdpconfiguser",
"type": "DatasetReference"
},
"firstRowOnly": true
}
},
{
"name": "Check if file exists in target",
"type": "GetMetadata",
"dependsOn": [
{
"activity": "Copy Data WT to ADLS",
"dependencyConditions": [
"Succeeded"
]
}
],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"typeProperties": {
"dataset": {
"referenceName": "AzureDataLakeStoreFile_wt_tgt_path_and_name",
"type": "DatasetReference",
"parameters": {
"TgtFilePath": "@activity('set_parameters_adh_or_sch').output.firstrow.TgtFileName_wt_dt_out",
"TgtFileName": {
"value": "@activity('set_parameters_adh_or_sch').output.firstrow.TgtFileName_wt_dt_out",
"type": "Expression"
}
}
},
"fieldList": [
"exists",
"size"
]
}
},
{
"name": "Copy Data WT to ADLS",
"type": "Copy",
"dependsOn": [
{
"activity": "set_parameters_adh_or_sch",
"dependencyConditions": [
"Succeeded"
]
}
],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [
{
"name": "Source",
"value": "@{activity('set_parameters_adh_or_sch').output.firstrow.SrcFilePath_wo_dt_out}/@{activity('set_parameters_adh_or_sch').output.firstrow.SrcFileName_wt_dt_out}"
},
{
"name": "Destination",
"value": "@{activity('set_parameters_adh_or_sch').output.firstrow.TgtFilePath_wt_dt_out}/@{activity('set_parameters_adh_or_sch').output.firstrow.TgtFilePath_wt_dt_out}"
}
],
"typeProperties": {
"source": {
"type": "FileSystemSource",
"recursive": true
},
"sink": {
"type": "AzureDataLakeStoreSink"
},
"enableStaging": false,
"dataIntegrationUnits": 0
},
"inputs": [
{
"referenceName": "FTP_SRC_FA",
"type": "DatasetReference",
"parameters": {
"SrcFileName": "@activity('set_parameters_adh_or_sch').output.firstrow.SrcFileName_wt_dt_out",
"SrcFilePath": "@activity('set_parameters_adh_or_sch').output.firstrow.SrcFilePath_wo_dt_out"
}
}
],
"outputs": [
{
"referenceName": "AzureDataLakeStoreFile_wt_tgt_path_and_name",
"type": "DatasetReference",
"parameters": {
"TgtFilePath": "@activity('set_parameters_adh_or_sch').output.firstrow.TgtFileName_wt_dt_out",
"TgtFileName": {
"value": "@activity('set_parameters_adh_or_sch').output.firstrow.TgtFileName_wt_dt_out",
"type": "Expression"
}
}
}
]
},
{
"name": "br_bs_update_failed",
"type": "SqlServerStoredProcedure",
"dependsOn": [
{
"activity": "Copy Data WT to ADLS",
"dependencyConditions": [
"Failed"
]
}
],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"typeProperties": {
"storedProcedureName": "[dbo].[usp_BatchRunUpdate]",
"storedProcedureParameters": {
"BatchId": {
"value": {
"value": "@activity('br_bs_loggin').output.firstrow.Batchid_out",
"type": "Expression"
},
"type": "String"
},
"FeedID": {
"value": {
"value": "@activity('br_bs_loggin').output.firstrow.FeedId_out",
"type": "Expression"
},
"type": "Int32"
},
"FeedRunId": {
"value": {
"value": "@activity('br_bs_loggin').output.firstrow.BatchRunId_out",
"type": "Expression"
},
"type": "Int32"
},
"Status": {
"value": "Failed",
"type": "String"
}
}
},
"linkedServiceName": {
"referenceName": "AzureSqlDatabase1_cdp_dev_sql_db_appconfig",
"type": "LinkedServiceReference"
}
},
{
"name": "If Condition1",
"type": "IfCondition",
"dependsOn": [
{
"activity": "Check if file exists in target",
"dependencyConditions": [
"Succeeded"
]
}
],
"typeProperties": {
"expression": {
"value": "@equals(activity('Check if file exists in target').output.Exists,true)",
"type": "Expression"
},
"ifFalseActivities": [
{
"name": "Stored Procedure_failed",
"type": "SqlServerStoredProcedure",
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"typeProperties": {
"storedProcedureName": "[dbo].[usp_BatchRunUpdate]",
"storedProcedureParameters": {
"BatchId": {
"value": {
"value": "@activity('br_bs_loggin').output.firstrow.Batchid_out",
"type": "Expression"
},
"type": "String"
},
"FeedID": {
"value": {
"value": "@activity('br_bs_loggin').output.firstrow.FeedId_out",
"type": "Expression"
},
"type": "Int32"
},
"FeedRunId": {
"value": {
"value": "@activity('br_bs_loggin').output.firstrow.BatchRunId_out",
"type": "Expression"
},
"type": "Int32"
},
"Status": {
"value": "Failed",
"type": "String"
}
}
},
"linkedServiceName": {
"referenceName": "AzureSqlDatabase1_cdp_dev_sql_db_appconfig",
"type": "LinkedServiceReference"
}
}
],
"ifTrueActivities": [
{
"name": "Stored Procedure1",
"type": "SqlServerStoredProcedure",
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"typeProperties": {
"storedProcedureName": "[dbo].[usp_BatchRunUpdate]",
"storedProcedureParameters": {
"BatchId": {
"value": {
"value": "@activity('br_bs_loggin').output.firstrow.Batchid_out",
"type": "Expression"
},
"type": "String"
},
"FeedID": {
"value": {
"value": "@activity('br_bs_loggin').output.firstrow.FeedId_out",
"type": "Expression"
},
"type": "Int32"
},
"FeedRunId": {
"value": {
"value": "@activity('br_bs_loggin').output.firstrow.BatchRunId_out",
"type": "Expression"
},
"type": "Int32"
},
"Status": {
"value": "Succeeded",
"type": "String"
}
}
},
"linkedServiceName": {
"referenceName": "AzureSqlDatabase1_cdp_dev_sql_db_appconfig",
"type": "LinkedServiceReference"
}
}
]
}
}
],
"parameters": {
"p_FeedName": {
"type": "String",
"defaultValue": "fa_cpsmyid_vdumcap1"
},
"p_BatchType": {
"type": "String",
"defaultValue": "RAW"
},
"p_RunType": {
"type": "String",
"defaultValue": "sch"
},
"p_SrcStartDate": {
"type": "String"
},
"p_SrcEndDate": {
"type": "String"
},
"p_TargetDate": {
"type": "String"
},
"p_SrcHour": {
"type": "String"
},
"p_TgtHour": {
"type": "String"
}
},
"variables": {
"v_StartDate": {
"type": "String"
},
"v_EndDate": {
"type": "String"
}
},
"folder": {
"name": "Batch_load"
}
},
"type": "Microsoft.DataFactory/factories/pipelines"
}