Например: у меня есть файл avsc, как показано ниже.
[{
"тип": "запись",
"namespace": "com.example",
"имя": "Клиент",
"поля": [
{"name": "first_name", "type": "string", "doc": "Имя клиента"},
{"name": "last_name", "type": "string", "doc": "Last Name of Customer"},
{"name": "age", "type": "int", "doc": "Age на момент регистрации"},
{"name": "height", "type": "float", "doc": "Высота на момент регистрации в см"},
{"name": "weight", "type": "float", "doc": "Weight на момент регистрации в кг"},
{"name": "automatic_email", "type": "boolean", "default": true, "doc": "Поле, указывающее, зарегистрирован ли пользователь в маркетинговых электронных письмах"}
]
},
{
"тип": "запись",
"namespace": "com.example",
"имя": "Клиенты",
"поля": [
{"name": "Customers", "type": {"type": "array", "items": "com.example.Customer"}, "doc": "Возраст на момент регистрации"}
]
}]
У меня есть пара Клиентов и добавлено к Клиентам
Customer.Builder customerBuilder1 = Customer.newBuilder();
customerBuilder1.setAge(30);
customerBuilder1.setFirstName("Mark");
customerBuilder1.setLastName("Simpson");
customerBuilder1.setAutomatedEmail(true);
customerBuilder1.setHeight(180f);
customerBuilder1.setWeight(90f);
Customer.Builder customerBuilder2 = Customer.newBuilder();
customerBuilder2.setAge(30);
customerBuilder2.setFirstName("Vishant");
customerBuilder2.setLastName("Shah");
customerBuilder2.setAutomatedEmail(true);
customerBuilder2.setHeight(181f);
customerBuilder2.setWeight(65f);
Customer customer1 = customerBuilder1.build();
System.out.println("Original : " +customer1.toString());
Customer customer2 = customerBuilder2.build();
System.out.println("Original : " + customer2.toString());
Customers.Builder customersBuilder = Customers.newBuilder();
customersBuilder.setCustomers(Arrays.asList(customer1, customer2));
Customers customers = customersBuilder.build();
//Write parquet file
try (ParquetWriter<Customers> writer = AvroParquetWriter
.<Customers>builder(new Path("customers-specific.parquet"))
.withSchema(customers.getSchema())
.withConf(new Configuration())
.withCompressionCodec(CompressionCodecName.SNAPPY)
.build()) {
writer.write(customers);
}
Как применить предикат в списке имени Клиента в массиве. Без сложного объекта это было бы просто, но это не работает для массива.
FilterPredicate predicate = eq(binaryColumn("first_name"), Binary.fromString("Vishant"));
try (ParquetReader<Customer> selectiveReader = AvroParquetReader.<Customers>builder(new Path("customer-specific.parquet"))
.withFilter(FilterCompat.get(predicate))
.build()) {
Customer selectedCustomer;
while ((selectedCustomer = selectiveReader.read()) != null) {
System.out.println("Selected Read" + selectedCustomer.toString());
}
}