Синтаксическая ошибка: org. apache .had oop .hive.ql.parse.ParseException: строка 5: 0 посторонних входных данных '{' ожидает StringLiteral рядом с '"типом"' в свойствах таблицы - PullRequest
1 голос
/ 16 июня 2020

Я пробовал запустить команду создания таблицы запроса куста.

Вот содержание кода:

CREATE EXTERNAL TABLE avro_test_1
ROW FORMAT
SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
WITH SERDEPROPERTIES ('avro.schema.literal'='
{
  "type" : "record",
  "name" : "pixel_record",
  "namespace" : "com.mediamath.log_streaming",
  "doc" : "represents a record published into kafka for each pixel event",
  "fields" : [ {
    "name" : "event_id",
    "type" : "string",
    "doc" : "unique ID for each pixel event",
    "default" : ""
  }, {
    "name" : "system_timestamps",
    "type" : [ "null", {
      "type" : "array",
      "items" : {
        "type" : "record",
        "name" : "timestamps_record",
        "doc" : "Read and publish timestamp of a system that processed this event",
        "fields" : [ {
          "name" : "publisher",
          "type" : "string",
          "doc" : "name of the system publishing this event",
          "default" : ""
        }, {
          "name" : "source_time",
          "type" : "long",
          "doc" : "unix time in nanos denoting moment when the message was received by the system",
          "default" : 0
        }, {
          "name" : "publish_time",
          "type" : "long",
          "doc" : "unix time in nanos denoting the moment when the message was published by the system",
          "default" : 0
        } ]
      }
    } ],
    "doc" : "array of 3-tuples containing message receive time, system name, and system publish time",
    "default" : null
  }, {
    "name" : "errors",
    "type" : [ "null", {
      "type" : "array",
      "items" : {
        "type" : "record",
        "name" : "Error",
        "doc" : "Used to indicate if an error occurred while processing this event. Provides more context about the nature of error",
        "fields" : [ {
          "name" : "publisher",
          "type" : "string",
          "doc" : "name of the system that encountered an error while processing this event",
          "default" : ""
        }, {
          "name" : "error_message",
          "type" : "string",
          "doc" : "brief error message",
          "default" : ""
        }, {
          "name" : "error_code",
          "type" : "int",
          "doc" : "pre-defined error codes for specific error types",
          "default" : 0
        } ]
      }
    } ],
    "doc" : "array of errors that occured while processing this event",
    "default" : null
  }, {
    "name" : "event_data",
    "type" : {
      "type" : "record",
      "name" : "pixel_event_data",
      "doc" : "represents a pixel event that was processed by MathTag. Refer to https://wiki.mediamath.com/wiki/pages/viewpage.action?pageId=79659035",
      "fields" : [ {
        "name" : "mt_timestamp",
        "type" : "string",
        "doc" : "timestamp when this pixel event occurred",
        "default" : ""
      }, {
        "name" : "ip_address",
        "type" : "string",
        "doc" : "IP address of browser",
        "default" : ""
      }, {
        "name" : "domain_name",
        "type" : "string",
        "doc" : "MM domain name like pixel.mathtag.com",
        "default" : ""
      }, {
        "name" : "mm_uuid",
        "type" : "string",
        "doc" : "MediaMath UUID",
        "default" : ""
      }, {
        "name" : "path_query",
        "type" : "string",
        "default" : ""
      }, {
        "name" : "user_agent",
        "type" : "string",
        "default" : ""
      }, {
        "name" : "referrer",
        "type" : "string",
        "default" : ""
      }, {
        "name" : "record_length",
        "type" : "int",
        "doc" : "string length of entire event string",
        "default" : 0
      }, {
        "name" : "filename",
        "type" : "string",
        "doc" : "log file name in which this event was recorded in pixel server",
        "default" : ""
      } ]
    },
    "doc" : "Mathtag event data"
  }, {
    "name" : "cid_data",
    "type" : [ "null", {
      "type" : "record",
      "name" : "cid_data",
      "doc" : "represents Connected ID information related to the MM UUID in the original event record",
      "fields" : [ {
        "name" : "one_of_uuid_or_connected_id",
        "type" : "string",
        "doc" : "One of: Connected ID, as reported by IDM; or the original UUID. Refer to next field to determine which."
      }, {
        "name" : "is_the_connected_id_real",
        "type" : "boolean",
        "doc" : "This flag will be set to 'true' if and only if the field 'one_of_uuid_or_connected_id' in this record contains an actual Connected ID value that was reported by IDM."
      }, {
        "name" : "uuid_cid_connection_type",
        "type" : "int",
        "doc" : "This flag represents the relationship between the MM_UUID (primary device ID in bid request) and the ConnectedID that is retrieved from Cross-Device ID Graph (IDM). 1 - Deterministic; 0 - Probabilistic; -1 - Unknown.",
        "default" : -1
      } ]
    } ],
    "doc" : "Connected ID enrichment",
    "default" : null
  }, {
    "name" : "geo_data",
    "type" : [ "null", {
      "type" : "record",
      "name" : "geo_data",
      "namespace" : "com.mediamath.geo",
      "doc" : "represents a record in geo meta data obtained from Digital Element",
      "fields" : [ {
        "name" : "ip_version",
        "type" : "int",
        "doc" : "version of Digital Element DB used to get geo metadata",
        "default" : 0
      }, {
        "name" : "start_ip",
        "type" : "long",
        "doc" : "start IP address of the geo IP range to which this pixel events IP address belongs",
        "default" : 0
      }, {
        "name" : "end_ip",
        "type" : "long",
        "doc" : "end IP address of the geo IP range to which this pixel events IP address belongs",
        "default" : 0
      }, {
        "name" : "country",
        "type" : "string",
        "doc" : "name of the country in which the user fired the pixel",
        "default" : ""
      }, {
        "name" : "country_code_iso",
        "type" : "string",
        "doc" : "an internationally recognized two-letter code (alpha-2) used to refer to countries and subdivisions",
        "default" : ""
      }, {
        "name" : "country_id",
        "type" : "int",
        "doc" : "unique MM ID tied to the Digital Element country field. See Country for description",
        "default" : 0
      }, {
        "name" : "region",
        "type" : "string",
        "doc" : "name of region in which the user fired the pixel",
        "default" : ""
      }, {
        "name" : "region_id",
        "type" : "int",
        "doc" : "unique MM ID tied to the Digital Element region field. See Region for description",
        "default" : 0
      }, {
        "name" : "dma",
        "type" : "string",
        "doc" : "name of designated market area in which the user fired the pixel",
        "default" : ""
      }, {
        "name" : "dma_id",
        "type" : "int",
        "doc" : "unique MM ID tied to the Digital Element dma field. See Dma for description",
        "default" : 0
      }, {
        "name" : "conn_speed",
        "type" : "string",
        "doc" : "reflects the speed of a user’s connection. For example, whether the user is connecting via dialup or a cable modem. This field can also include connection types such as satellite or mobile",
        "default" : ""
      }, {
        "name" : "conn_speed_id",
        "type" : "int",
        "doc" : "unique MM ID tied to the Digital Element connection speed field. See ConnSpeed for description",
        "default" : 0
      }, {
        "name" : "isp",
        "type" : "string",
        "doc" : "Internet service provider associated with the user who fired the pixel. Examples: AT&T, Verizon, Charter etc",
        "default" : ""
      }, {
        "name" : "isp_id",
        "type" : "int",
        "doc" : "unique MM ID tied to the Digital Element isp field. See Isp for description.",
        "default" : 0
      }, {
        "name" : "zipcode",
        "type" : "string",
        "doc" : "zipcode/postal code in which the user fired the pixel",
        "default" : ""
      }, {
        "name" : "zipcode_id",
        "type" : "int",
        "doc" : "unique MM ID tied to the Digital Element zipcode field. See ZipCode for description.",
        "default" : 0
      }, {
        "name" : "GMT_offset",
        "type" : "int",
        "doc" : "the current offset from Greenwich Mean Time (GMT/UTC) accounting for Daylight Savings Time, if appropriate",
        "default" : 0
      }, {
        "name" : "in_dst",
        "type" : "string",
        "doc" : "if country is in daylight savings time or not. Y for yes or N for no",
        "default" : ""
      }, {
        "name" : "homebiz_type_id",
        "type" : "int",
        "doc" : "connection type of the user who fired the pixel, either 1 for ‘business’ or 2 for ‘home’",
        "default" : -1
      }, {
        "name" : "homebiz_type",
        "type" : "string",
        "doc" : "connection type of the user who fired the pixel, ‘business’ or ‘home’",
        "default" : "?"
      }, {
        "name" : "region_code",
        "type" : "int",
        "doc" : "a numerical id provided by digital element that corresponds to the region name",
        "default" : 0
      }, {
        "name" : "city",
        "type" : "string",
        "doc" : "concatination of country abbr + region abbr + city name",
        "default" : ""
      }, {
        "name" : "city_code",
        "type" : "int",
        "doc" : "a numerical id provided by digital element that corresponds to the city name",
        "default" : 0
      }, {
        "name" : "city_code_id",
        "type" : "int",
        "doc" : "unique MM ID tied to the Digital Element city code field. See CityCode for description",
        "default" : 0
      } ]
    } ],
    "doc" : "Geo data enrichment",
    "default" : null
  }, {
    "name" : "misc_data",
    "type" : [ "null", {
      "type" : "record",
      "name" : "misc_data",
      "doc" : "11th field of MathTag log line",
      "fields" : [ {
        "name" : "last",
        "type" : [ "null", {
          "type" : "record",
          "name" : "last",
          "fields" : [ {
            "name" : "count",
            "type" : "long",
            "doc" : "total number of records"
          } ]
        } ],
        "doc" : "indicates last record in MathTag log file",
        "default" : null
      } ]
    } ],
    "doc" : "Misc data enrichment",
    "default" : null
  }, {
    "name" : "standard_segments",
    "type" : [ "null", {
      "type" : "record",
      "name" : "standard_segments",
      "doc" : "metadata record providing insights into Standard Segments enrichment",
      "fields" : [ {
        "name" : "enriched",
        "type" : "boolean",
        "doc" : "indicates that this is a parent record from which additional SS records were generated"
      } ]
    } ],
    "doc" : "Standard Segments",
    "default" : null
  } ]
}
')
STORED AS
INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
LOCATION 's3://raw-logs-data-prod-superior-mm/pixels/avro/'
;

Я проверяю этот формат JSON, и это допустимый формат. И я получаю следующую ошибку:

Синтаксическая ошибка: org. apache .had oop .hive.ql.parse.ParseException: строка 5: 0 посторонних входных данных '{' ожидает StringLiteral рядом с "type" в свойствах таблицы

...