Шаг освоения не завершается, когда формат XML - PullRequest
0 голосов
/ 20 октября 2019

Я использую DHF 5.0.3 с ML 10.0-1 на OSX.

У меня есть простой поток, который загружает CSV, сопоставляет с сущностью, а затем осваивает. Поток работает нормально, когда все три шага сконфигурированы с JSON - я получаю ожидаемую документацию по сущности и объединенную сущность. Когда я переключаюсь на вывод XML, я получаю объекты, но не объединенный документ. Для всех документов сущностей применяется дополнительная коллекция мастеринга. Это ошибка или я что-то не так делаю?

Поток, CSV и сущность ниже.

Спасибо, Ли

пс. Было бы замечательно, если бы вы могли поместить выходной формат на главной странице этапов картирования и мастеринга, а не мне, чтобы перейти к дополнительным настройкам, которые у вас есть на этапе приема, так что это неуместно с точки зрения пользовательского интерфейса. Весь мир, конечно, не JSON.

{
  "info" : {
    "title" : "Customer",
    "version" : "0.0.1",
    "baseUri" : "http://hlf.com/",
    "description" : "Customer"
  },
  "definitions" : {
    "Customer" : {
      "required" : [ ],
      "pii" : [ ],
      "elementRangeIndex" : [ ],
      "rangeIndex" : [ ],
      "wordLexicon" : [ ],
      "properties" : {
        "Month" : {
          "datatype" : "gYearMonth"
        },
        "CustomerID" : {
          "datatype" : "string",
          "collation" : "http://marklogic.com/collation/codepoint"
        },
        "BorrowerType" : {
          "datatype" : "string",
          "collation" : "http://marklogic.com/collation/codepoint"
        },
        "Nationality" : {
          "datatype" : "string",
          "collation" : "http://marklogic.com/collation/codepoint"
        },
        "DateOfBirth" : {
          "datatype" : "date"
        },
        "AgreementNumber" : {
          "datatype" : "string",
          "collation" : "http://marklogic.com/collation/codepoint"
        },
        "RelationToAC" : {
          "datatype" : "string",
          "collation" : "http://marklogic.com/collation/codepoint"
        }
      }
    }
  }
}

{
  "name" : "Customer",
  "description" : "null",
  "batchSize" : 100,
  "threadCount" : 2,
  "stopOnError" : false,
  "options" : { },
  "version" : 0,
  "steps" : {
    "1" : {
      "name" : "IngestCustomer",
      "description" : "",
      "options" : {
        "additionalCollections" : [ "Customer" ],
        "headers" : {
          "sources" : [ {
            "name" : "Customer"
          } ],
          "createdOn" : "currentDateTime",
          "createdBy" : "currentUser"
        },
        "sourceQuery" : "cts.collectionQuery([])",
        "collections" : [ "IngestCustomer", "Customer" ],
        "permissions" : "rest-reader,read,rest-writer,update",
        "outputFormat" : "xml",
        "targetDatabase" : "data-hub-STAGING"
      },
      "customHook" : { },
      "retryLimit" : 0,
      "batchSize" : 0,
      "threadCount" : 0,
      "stepDefinitionName" : "default-ingestion",
      "stepDefinitionType" : "INGESTION",
      "fileLocations" : {
        "inputFilePath" : "/Users/leepollington/ML/HLF/data/test",
        "inputFileType" : "csv",
        "outputURIReplacement" : "/Users/leepollington/ML/HLF/data/test, ''",
        "separator" : "|"
      },
      "mappingStep" : false,
      "customStep" : false,
      "mappingName" : null
    },
    "2" : {
      "name" : "CustomerMapping",
      "description" : "",
      "options" : {
        "additionalCollections" : [ ],
        "sourceQuery" : "cts.collectionQuery([\"IngestCustomer\"])",
        "mapping" : {
          "name" : "Customer-CustomerMapping",
          "version" : 20
        },
        "targetEntity" : "Customer",
        "sourceDatabase" : "data-hub-STAGING",
        "collections" : [ "CustomerMapping", "Customer" ],
        "sourceCollection" : "IngestCustomer",
        "outputFormat" : "xml",
        "targetDatabase" : "data-hub-FINAL"
      },
      "customHook" : { },
      "retryLimit" : null,
      "batchSize" : null,
      "threadCount" : null,
      "stepDefinitionName" : "default-mapping",
      "stepDefinitionType" : "MAPPING",
      "mappingStep" : true,
      "customStep" : false,
      "mappingName" : "Customer-CustomerMapping"
    },
    "3" : {
      "name" : "CustomerMastering",
      "description" : "",
      "options" : {
        "stepUpdate" : true,
        "additionalCollections" : [ "xml" ],
        "sourceQuery" : "cts.collectionQuery([\"CustomerMapping\"])",
        "acceptsBatch" : true,
        "targetEntity" : "Customer",
        "sourceDatabase" : "data-hub-FINAL",
        "collections" : [ "CustomerMastering", "Customer", "xml" ],
        "mergeOptions" : {
          "matchOptions" : "",
          "propertyDefs" : {
            "properties" : [ {
              "localname" : "Month",
              "name" : "Month"
            }, {
              "localname" : "CustomerID",
              "name" : "CustomerID"
            }, {
              "localname" : "BorrowerType",
              "name" : "BorrowerType"
            }, {
              "localname" : "Nationality",
              "name" : "Nationality"
            }, {
              "localname" : "DateOfBirth",
              "name" : "DateOfBirth"
            } ],
            "namespaces" : { }
          },
          "algorithms" : {
            "stdAlgorithm" : {
              "timestamp" : { }
            },
            "custom" : [ ],
            "collections" : {
              "onMerge" : {
                "add" : {
                  "collection" : [ "MERGED" ]
                },
                "remove" : {
                  "collection" : [ "" ]
                },
                "set" : {
                  "collection" : [ "" ]
                }
              }
            }
          },
          "mergeStrategies" : [ ],
          "merging" : [ {
            "propertyName" : "Month",
            "maxValues" : "3",
            "sourceWeights" : [ ]
          }, {
            "propertyName" : "CustomerID",
            "maxValues" : "1",
            "sourceWeights" : [ ]
          }, {
            "propertyName" : "BorrowerType",
            "maxValues" : "1",
            "sourceWeights" : [ ]
          }, {
            "propertyName" : "Nationality",
            "maxValues" : "1",
            "sourceWeights" : [ ]
          }, {
            "propertyName" : "DateOfBirth",
            "maxValues" : "1",
            "sourceWeights" : [ ]
          } ]
        },
        "matchOptions" : {
          "dataFormat" : "json",
          "propertyDefs" : {
            "property" : [ {
              "localname" : "CustomerID",
              "name" : "CustomerID"
            } ]
          },
          "algorithms" : {
            "algorithm" : [ {
              "name" : "double-metaphone",
              "function" : "double-metaphone",
              "namespace" : "http://marklogic.com/smart-mastering/algorithms",
              "at" : "/com.marklogic.smart-mastering/algorithms/double-metaphone.xqy"
            }, {
              "name" : "thesaurus",
              "function" : "thesaurus",
              "namespace" : "http://marklogic.com/smart-mastering/algorithms",
              "at" : "/com.marklogic.smart-mastering/algorithms/thesaurus.xqy"
            }, {
              "name" : "zip-match",
              "function" : "zip-match",
              "namespace" : "http://marklogic.com/smart-mastering/algorithms",
              "at" : "/com.marklogic.smart-mastering/algorithms/zip.xqy"
            }, {
              "name" : "standard-reduction",
              "function" : "standard-reduction"
            } ]
          },
          "collections" : {
            "content" : [ ]
          },
          "scoring" : {
            "add" : [ {
              "propertyName" : "CustomerID",
              "weight" : "50"
            } ],
            "expand" : [ ],
            "reduce" : [ ]
          },
          "actions" : {
            "action" : [ ]
          },
          "thresholds" : {
            "threshold" : [ {
              "above" : "10",
              "label" : "CMT",
              "action" : "merge"
            } ]
          },
          "tuning" : {
            "maxScan" : 200
          }
        },
        "sourceCollection" : "CustomerMapping",
        "outputFormat" : "xml",
        "targetDatabase" : "data-hub-FINAL"
      },
      "customHook" : { },
      "retryLimit" : 0,
      "batchSize" : 0,
      "threadCount" : 0,
      "stepDefinitionName" : "default-mastering",
      "stepDefinitionType" : "MASTERING",
      "mappingStep" : false,
      "customStep" : false,
      "mappingName" : null
    }
  }
}

Month|CustomerID|BorrowerType|Nationality|DateOfBirth
2019-04|Sxxxx00619B|000|USA|1956-02-06
2019-05|Sxxxx00619B|000|USA|1956-02-06
2019-06|Sxxxx00619B|000|USA|1956-02-06
...