Я использую версию spark- sql -2.4.1 с java8 в моем Po C.
У меня есть следующие данные учеников, стандартные / классовые, как показано ниже
public static class Student implements Serializable {
private String className;
private String studentName;
private Integer paperOneMarks;
private Integer paperTwoMarks;
private Integer paperThreeMarks;
private Integer paperFourMarks;
public Student(String className, String studentName, Integer paperOneMarks, Integer paperTwoMarks,
Integer paperThreeMarks, Integer paperFourMarks) {
super();
this.className = className;
this.studentName = studentName;
this.paperOneMarks = paperOneMarks;
this.paperTwoMarks = paperTwoMarks;
this.paperThreeMarks = paperThreeMarks;
this.paperFourMarks = paperFourMarks;
}
public String getClassName() {
return className;
}
public void setClassName(String className) {
this.className = className;
}
public String getStudentName() {
return studentName;
}
public void setStudentName(String studentName) {
this.studentName = studentName;
}
public Integer getPaperOneMarks() {
return paperOneMarks;
}
public void setPaperOneMarks(Integer paperOneMarks) {
this.paperOneMarks = paperOneMarks;
}
public Integer getPaperTwoMarks() {
return paperTwoMarks;
}
public void setPaperTwoMarks(Integer paperTwoMarks) {
this.paperTwoMarks = paperTwoMarks;
}
public Integer getPaperThreeMarks() {
return paperThreeMarks;
}
public void setPaperThreeMarks(Integer paperThreeMarks) {
this.paperThreeMarks = paperThreeMarks;
}
public Integer getPaperFourMarks() {
return paperFourMarks;
}
public void setPaperFourMarks(Integer paperFourMarks) {
this.paperFourMarks = paperFourMarks;
}
}
List<Student> data = Arrays.asList(
new Student("4th-Class", "Kiran", 23, 19, 26, 22),
new Student("4th-Class", "Peter", 32, 28, 21, 31),
new Student("4th-Class", "John", 21, 27, 26, 33),
new Student("4th-Class", "Alex", 17, 28, 25, 34),
new Student("3rd-Class", "Tony", 32, 17, 26, 22),
new Student("3rd-Class", "Fred", 19, 30, 25, 34),
new Student("3rd-Class", "Danny", 27, 28, 31, 30),
new Student("3rd-Class", "Sunny", 30, 31, 26, 21),
new Student("2nd-Class", "Stella", 19, 23, 22, 30),
new Student("2nd-Class", "Diya", 33, 28, 26, 17),
new Student("2nd-Class", "Amber", 32, 17, 25, 21),
new Student("2nd-Class", "Tanvish", 27, 28, 33, 23),
new Student("2nd-Class", "April", 32, 22, 26, 34),
new Student("1st-Class", "Maria", 27, 28, 22, 34),
new Student("1st-Class", "Justin", 30, 31, 19, 23),
new Student("1st-Class", "Peter", 32, 28, 18, 34),
new Student("1st-Class", "Anny", 22, 25, 26, 21),
new Student("1st-Class", "Kim", 19, 28, 32, 30),
new Student("1st-Class", "Akio", 17, 33, 26, 27)
);
Encoder<Student> dataEncoder = Encoders.bean(Student.class);
Dataset<Student> ds = spark.createDataset(data, dataEncoder);
Я получу список классов / classNames, т. Е. «2-й класс», «3-й класс» для каждого вызова, который мне нужен, чтобы бороться с 1-м ранкером? это примерные данные, но завтра я получу сотни классов / названий, т.е. для каждого института. Следовательно, мне нужно запустить это параллельно.
Как запустить / рассчитать эту параллель ?? т.е. много в одном go. как это сделать?