файл .dat в таблицу кустов с данными в двойных кавычках - PullRequest
0 голосов
/ 06 ноября 2019

У меня есть файл .dat, как показано ниже, для загрузки в папку hdfs, а затем в таблицу. Это сгенерировано программным обеспечением

"FACT" "Jan" "2011YR" "7913" "25A04PLTF" "PM IAP None" "R_LOC" "Custom2.FM None" "Custom1.PM None" "Custom4.FM None" "25BU" "25A04PLTF_OC0001" -196.790000000000
"FACT" "Jan" "2011YR" "9370" "25A01MKT" "PM IAP None" "R_INR" "Custom2.FM None" "Custom1.PM None" "Custom4.FM None" "39BU" "25A01MKT_NoSubDiv" 778.210000000000
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| term | month  | year  | ledger_group  | condition1  | condition2  | condition3  | condition4     | condition5     |  condition6    | condition7  | condition8       | Value               |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|FACT  | Jan    |2011YR |2025           |25A01PLTF    |PM IAP None  |R_INR        |Custom2.FM None |Custom1.PM None |Custom4.FM None |22BU         |25A06PLTF_B603    |561.400000000000     |
|FACT  | Jan    |2011YR |7913           |25A04PLTF    |PM IAP None  |R_LOC        |Custom2.FM None |Custom1.PM None |Custom4.FM None |25BU         |25A04PLTF_OC0001  |-196.790000000000    |
|FACT  | Jan    |2011YR |9370           |25A01MKT     |PM IAP None  |R_INR        |Custom2.FM None |Custom1.PM None |Custom4.FM None |39BU         |25A01MKT_NoSubDiv |778.210000000000     |

Я использовал следующий оператор создания ниже.

create table db.hpm_poc( term string,
month string,
year varchar(32),
ledger_group varchar(32),
condition1 varchar(32),
condition2 varchar(32),
condition3 varchar(32),
condition4 varchar(32),
condition5 varchar(32),
condition6 varchar(32),
condition7 varchar(32),
condition8 varchar(32),
value float)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
   "separatorChar" = "\t",
   "quoteChar"     = "\"")
LOCATION 'xyz';

, но получаю следующий результат

+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------+-------+---------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+------------+--------+--+
|                                                                                       term                                                                                       | month  | year  | ledger_group  | condition1  | condition2  | condition3  | condition4  | condition5  | condition6  | condition7  | Condition8 | value  |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------+-------+---------------+-------------+-------------+-------------+-------------+-------------+-------------+-------------+------------+--------+--+
| FACT" "Jan" "2011YR" "2025" "25A01PLTF" "PM IAP None" "R_INR" "Custom2.FM None" "Custom1.PM None" "Custom4.FM None" "22BU" "25A06PLTF_B603" 561.400000000000                   | NULL   | NULL  | NULL          | NULL        | NULL        | NULL        | NULL        | NULL        | NULL        | NULL        | NULL       | NULL   |
| FACT" "Jan" "2011YR" "7913" "25A04PLTF" "PM IAP None" "R_LOC" "Custom2.FM None" "Custom1.PM None" "Custom4.FM None" "25BU" "25A04PLTF_OC0001" -196.790000000000                | NULL   | NULL  | NULL          | NULL        | NULL        | NULL        | NULL        | NULL        | NULL        | NULL        | NULL       | NULL   |
| FACT" "Jan" "2011YR" "9370" "25A01MKT" "PM IAP None" "R_INR" "Custom2.FM None" "Custom1.PM None" "Custom4.FM None" "39BU" "25A01MKT_NoSubDiv" 778.210000000000                 | NULL   | NULL  | NULL          | NULL        | NULL        | NULL        | NULL        | NULL        | NULL        | NULL        | NULL       | NULL   |

и вSHOW CREATE TABLE типы данных ddl различны

 CREATE TABLE `db.hpm_poc`(                                                    
   `term` string COMMENT 'from deserializer',                                                             
   `month` string COMMENT 'from deserializer',                                                            
   `year` string COMMENT 'from deserializer',                                                             
   `ledger_group` string COMMENT 'from deserializer',                                                     
   `condition1` string COMMENT 'from deserializer',                                                       
   `condition2` string COMMENT 'from deserializer',                                                       
   `condition3` string COMMENT 'from deserializer',                                                       
   `condition4` string COMMENT 'from deserializer',                                                       
   `condition5` string COMMENT 'from deserializer',                                                       
   `condition6` string COMMENT 'from deserializer',                                                       
   `condition7` string COMMENT 'from deserializer',
   `condition8` string COMMENT 'from deserializer',   
   `value` string COMMENT 'from deserializer')                                                            
 ROW FORMAT SERDE                                                                                         
   'org.apache.hadoop.hive.serde2.OpenCSVSerde'                                                           
 WITH SERDEPROPERTIES (                                                                                   
   'quoteChar'='\"',                                                                                      
   'separatorChar'='\t')                                                                                  
 STORED AS INPUTFORMAT                                                                                    
   'org.apache.hadoop.mapred.TextInputFormat'                                                             
 OUTPUTFORMAT                                                                                             
   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'                                           
 LOCATION                                                                                                 
   'xyz'  
 TBLPROPERTIES (                                                                                          
   'numFiles'='0',                                                                                        
   'totalSize'='0',                                                                                       
   'transient_lastDdlTime'='0');`                                                                

Может кто-нибудь помочь мне с этим?

...