Я использую DHF 5.0.3 с ML 10.0-1 на OSX.
У меня есть простой поток, который загружает CSV, сопоставляет с сущностью, а затем осваивает. Поток работает нормально, когда все три шага сконфигурированы с JSON - я получаю ожидаемую документацию по сущности и объединенную сущность. Когда я переключаюсь на вывод XML, я получаю объекты, но не объединенный документ. Для всех документов сущностей применяется дополнительная коллекция мастеринга. Это ошибка или я что-то не так делаю?
Поток, CSV и сущность ниже.
Спасибо, Ли
пс. Было бы замечательно, если бы вы могли поместить выходной формат на главной странице этапов картирования и мастеринга, а не мне, чтобы перейти к дополнительным настройкам, которые у вас есть на этапе приема, так что это неуместно с точки зрения пользовательского интерфейса. Весь мир, конечно, не JSON.
{
"info" : {
"title" : "Customer",
"version" : "0.0.1",
"baseUri" : "http://hlf.com/",
"description" : "Customer"
},
"definitions" : {
"Customer" : {
"required" : [ ],
"pii" : [ ],
"elementRangeIndex" : [ ],
"rangeIndex" : [ ],
"wordLexicon" : [ ],
"properties" : {
"Month" : {
"datatype" : "gYearMonth"
},
"CustomerID" : {
"datatype" : "string",
"collation" : "http://marklogic.com/collation/codepoint"
},
"BorrowerType" : {
"datatype" : "string",
"collation" : "http://marklogic.com/collation/codepoint"
},
"Nationality" : {
"datatype" : "string",
"collation" : "http://marklogic.com/collation/codepoint"
},
"DateOfBirth" : {
"datatype" : "date"
},
"AgreementNumber" : {
"datatype" : "string",
"collation" : "http://marklogic.com/collation/codepoint"
},
"RelationToAC" : {
"datatype" : "string",
"collation" : "http://marklogic.com/collation/codepoint"
}
}
}
}
}
{
"name" : "Customer",
"description" : "null",
"batchSize" : 100,
"threadCount" : 2,
"stopOnError" : false,
"options" : { },
"version" : 0,
"steps" : {
"1" : {
"name" : "IngestCustomer",
"description" : "",
"options" : {
"additionalCollections" : [ "Customer" ],
"headers" : {
"sources" : [ {
"name" : "Customer"
} ],
"createdOn" : "currentDateTime",
"createdBy" : "currentUser"
},
"sourceQuery" : "cts.collectionQuery([])",
"collections" : [ "IngestCustomer", "Customer" ],
"permissions" : "rest-reader,read,rest-writer,update",
"outputFormat" : "xml",
"targetDatabase" : "data-hub-STAGING"
},
"customHook" : { },
"retryLimit" : 0,
"batchSize" : 0,
"threadCount" : 0,
"stepDefinitionName" : "default-ingestion",
"stepDefinitionType" : "INGESTION",
"fileLocations" : {
"inputFilePath" : "/Users/leepollington/ML/HLF/data/test",
"inputFileType" : "csv",
"outputURIReplacement" : "/Users/leepollington/ML/HLF/data/test, ''",
"separator" : "|"
},
"mappingStep" : false,
"customStep" : false,
"mappingName" : null
},
"2" : {
"name" : "CustomerMapping",
"description" : "",
"options" : {
"additionalCollections" : [ ],
"sourceQuery" : "cts.collectionQuery([\"IngestCustomer\"])",
"mapping" : {
"name" : "Customer-CustomerMapping",
"version" : 20
},
"targetEntity" : "Customer",
"sourceDatabase" : "data-hub-STAGING",
"collections" : [ "CustomerMapping", "Customer" ],
"sourceCollection" : "IngestCustomer",
"outputFormat" : "xml",
"targetDatabase" : "data-hub-FINAL"
},
"customHook" : { },
"retryLimit" : null,
"batchSize" : null,
"threadCount" : null,
"stepDefinitionName" : "default-mapping",
"stepDefinitionType" : "MAPPING",
"mappingStep" : true,
"customStep" : false,
"mappingName" : "Customer-CustomerMapping"
},
"3" : {
"name" : "CustomerMastering",
"description" : "",
"options" : {
"stepUpdate" : true,
"additionalCollections" : [ "xml" ],
"sourceQuery" : "cts.collectionQuery([\"CustomerMapping\"])",
"acceptsBatch" : true,
"targetEntity" : "Customer",
"sourceDatabase" : "data-hub-FINAL",
"collections" : [ "CustomerMastering", "Customer", "xml" ],
"mergeOptions" : {
"matchOptions" : "",
"propertyDefs" : {
"properties" : [ {
"localname" : "Month",
"name" : "Month"
}, {
"localname" : "CustomerID",
"name" : "CustomerID"
}, {
"localname" : "BorrowerType",
"name" : "BorrowerType"
}, {
"localname" : "Nationality",
"name" : "Nationality"
}, {
"localname" : "DateOfBirth",
"name" : "DateOfBirth"
} ],
"namespaces" : { }
},
"algorithms" : {
"stdAlgorithm" : {
"timestamp" : { }
},
"custom" : [ ],
"collections" : {
"onMerge" : {
"add" : {
"collection" : [ "MERGED" ]
},
"remove" : {
"collection" : [ "" ]
},
"set" : {
"collection" : [ "" ]
}
}
}
},
"mergeStrategies" : [ ],
"merging" : [ {
"propertyName" : "Month",
"maxValues" : "3",
"sourceWeights" : [ ]
}, {
"propertyName" : "CustomerID",
"maxValues" : "1",
"sourceWeights" : [ ]
}, {
"propertyName" : "BorrowerType",
"maxValues" : "1",
"sourceWeights" : [ ]
}, {
"propertyName" : "Nationality",
"maxValues" : "1",
"sourceWeights" : [ ]
}, {
"propertyName" : "DateOfBirth",
"maxValues" : "1",
"sourceWeights" : [ ]
} ]
},
"matchOptions" : {
"dataFormat" : "json",
"propertyDefs" : {
"property" : [ {
"localname" : "CustomerID",
"name" : "CustomerID"
} ]
},
"algorithms" : {
"algorithm" : [ {
"name" : "double-metaphone",
"function" : "double-metaphone",
"namespace" : "http://marklogic.com/smart-mastering/algorithms",
"at" : "/com.marklogic.smart-mastering/algorithms/double-metaphone.xqy"
}, {
"name" : "thesaurus",
"function" : "thesaurus",
"namespace" : "http://marklogic.com/smart-mastering/algorithms",
"at" : "/com.marklogic.smart-mastering/algorithms/thesaurus.xqy"
}, {
"name" : "zip-match",
"function" : "zip-match",
"namespace" : "http://marklogic.com/smart-mastering/algorithms",
"at" : "/com.marklogic.smart-mastering/algorithms/zip.xqy"
}, {
"name" : "standard-reduction",
"function" : "standard-reduction"
} ]
},
"collections" : {
"content" : [ ]
},
"scoring" : {
"add" : [ {
"propertyName" : "CustomerID",
"weight" : "50"
} ],
"expand" : [ ],
"reduce" : [ ]
},
"actions" : {
"action" : [ ]
},
"thresholds" : {
"threshold" : [ {
"above" : "10",
"label" : "CMT",
"action" : "merge"
} ]
},
"tuning" : {
"maxScan" : 200
}
},
"sourceCollection" : "CustomerMapping",
"outputFormat" : "xml",
"targetDatabase" : "data-hub-FINAL"
},
"customHook" : { },
"retryLimit" : 0,
"batchSize" : 0,
"threadCount" : 0,
"stepDefinitionName" : "default-mastering",
"stepDefinitionType" : "MASTERING",
"mappingStep" : false,
"customStep" : false,
"mappingName" : null
}
}
}
Month|CustomerID|BorrowerType|Nationality|DateOfBirth
2019-04|Sxxxx00619B|000|USA|1956-02-06
2019-05|Sxxxx00619B|000|USA|1956-02-06
2019-06|Sxxxx00619B|000|USA|1956-02-06