Проверьте, работает ли это для вас.
> cat hive_table.txt2
show create table hive_table:
create table hive_table(id number,age number)
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
LOCATION 'hdfs:/path/'
TBLPROPERTIES ( 'spark.sql.sources ....)
show create table hive_table2:
create table hive_table2(id number,age number)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION 'hdfs:/path/'
TBLPROPERTIES ( 'spark.sql.sources ....)
show create table hive_table3:
create table hive_table3(id number,age number)
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION 'hdfs:/path/'
TBLPROPERTIES ( 'spark.sql.sources ....)
>
> hive_table.pl hive_table.txt2
create table hive_table(id number,age number)
stored as orc
create table hive_table2(id number,age number)
stored as orc
create table hive_table3(id number,age number)
stored as orc
>
Сценарий
> cat hive_table.pl
#!/usr/bin/perl
$file=$ARGV[0];
$x=qx(cat $file);
while($x=~m/(.+?)(create table.+?)(CREATE TABLE.+?)(PARTITIONED BY|STRED AS INPUTFORMAT|ROW FORMAT SERDE|OUTPUTFORMAT|LOCATION|TBLPROPERTIES)(.*)/iosm)
{
$x=$5;
$table_desc=$3;
print "$table_desc stored as orc\n";
}
>
РЕДАКТИРОВАТЬ1:
> cat hive_table.pl
#!/usr/bin/perl
$file=$ARGV[0];
$x=qx(cat $file);
while($x=~m/(.+?)(create table.+?)(PARTITIONED BY|STRED AS INPUTFORMAT|ROW FORMAT SERDE|OUTPUTFORMAT|LOCATION|TBLPROPERTIES)(.*)/iosm)
{
$x=$4;
$table_desc=$2;
print "$table_desc stored as orc\n";
}
>
РЕДАКТИРОВАТЬ 2:
> cat hive_table.txt3
create table hive_table(id number,age number)
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
LOCATION 'hdfs:/path/'
TBLPROPERTIES ( 'spark.sql.sources ....)
create table hive_table2(id number,age number)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION 'hdfs:/path/'
TBLPROPERTIES ( 'spark.sql.sources ....)
create table hive_table3(id number,age number)
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION 'hdfs:/path/'
TBLPROPERTIES ( 'spark.sql.sources ..)
> hive_table.pl hive_table.txt3
create table hive_table(id number,age number)
stored as orc
create table hive_table2(id number,age number)
stored as orc
create table hive_table3(id number,age number)
stored as orc
/etl/stage3/CAM/AN06599/work_2018/stack> cat hive_table.pl
#!/usr/bin/perl
$file=$ARGV[0];
$x=qx(cat $file);
$x="dummy".$x."dummy";
while($x=~m/(.+?)(create table.+?)(PARTITIONED BY|STRED AS INPUTFORMAT|ROW FORMAT SERDE|OUTPUTFORMAT|LOCATION|TBLPROPERTIES)(.*)/iosm)
{
$x=$4;
$table_desc=$2;
print "$table_desc stored as orc\n";
}
>