Я просто не могу поверить, что кто-то не поделился библиотекой для этого - ну, я посмотрел в github и есть анализатор имен javascript, который можно легко перевести на java: https://github.com/joshfraser/JavaScript-Name-Parser
Я также изменил код в одном из ответов, чтобы работать немного лучше, и включил тестовый пример:
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.StringUtils;
public class NameParser {
private String firstName = "";
private String lastName = "";
private String middleName = "";
private List<String> middleNames = new ArrayList<String>();
private List<String> titlesBefore = new ArrayList<String>();
private List<String> titlesAfter = new ArrayList<String>();
private String[] prefixes = { "dr", "mr", "ms", "atty", "prof", "miss", "mrs" };
private String[] suffixes = { "jr", "sr", "ii", "iii", "iv", "v", "vi", "esq", "2nd", "3rd", "jd", "phd",
"md", "cpa" };
public NameParser() {
}
public NameParser(String name) {
parse(name);
}
private void reset() {
firstName = lastName = middleName = "";
middleNames = new ArrayList<String>();
titlesBefore = new ArrayList<String>();
titlesAfter = new ArrayList<String>();
}
private boolean isOneOf(String checkStr, String[] titles) {
for (String title : titles) {
if (checkStr.toLowerCase().startsWith(title))
return true;
}
return false;
}
public void parse(String name) {
if (StringUtils.isBlank(name))
return;
this.reset();
String[] words = name.split(" ");
boolean isFirstName = false;
for (String word : words) {
if (StringUtils.isBlank(word))
continue;
if (word.charAt(word.length() - 1) == '.') {
if (!isFirstName && !this.isOneOf(word, prefixes)) {
firstName = word;
isFirstName = true;
} else if (isFirstName) {
middleNames.add(word);
} else {
titlesBefore.add(word);
}
} else {
if (word.endsWith(","))
word = StringUtils.chop(word);
if (isFirstName == false) {
firstName = word;
isFirstName = true;
} else {
middleNames.add(word);
}
}
}
if (middleNames.size() > 0) {
boolean stop = false;
List<String> toRemove = new ArrayList<String>();
for (int i = middleNames.size() - 1; i >= 0 && !stop; i--) {
String str = middleNames.get(i);
if (this.isOneOf(str, suffixes)) {
titlesAfter.add(str);
} else {
lastName = str;
stop = true;
}
toRemove.add(str);
}
if (StringUtils.isBlank(lastName) && titlesAfter.size() > 0) {
lastName = titlesAfter.get(titlesAfter.size() - 1);
titlesAfter.remove(titlesAfter.size() - 1);
}
for (String s : toRemove) {
middleNames.remove(s);
}
}
}
public String getFirstName() {
return firstName;
}
public String getLastName() {
return lastName;
}
public String getMiddleName() {
if (StringUtils.isBlank(this.middleName)) {
for (String name : middleNames) {
middleName += (name + " ");
}
middleName = StringUtils.chop(middleName);
}
return middleName;
}
public List<String> getTitlesBefore() {
return titlesBefore;
}
public List<String> getTitlesAfter() {
return titlesAfter;
}
}
Контрольный пример:
import junit.framework.Assert;
import org.junit.Test;
public class NameParserTest {
private class TestData {
String name;
String firstName;
String lastName;
String middleName;
public TestData(String name, String firstName, String middleName, String lastName) {
super();
this.name = name;
this.firstName = firstName;
this.lastName = lastName;
this.middleName = middleName;
}
}
@Test
public void test() {
TestData td[] = { new TestData("Henry \"Hank\" J. Fasthoff IV", "Henry", "\"Hank\" J.", "Fasthoff"),
new TestData("April A. (Caminez) Bentley", "April", "A. (Caminez)", "Bentley"),
new TestData("fff lll", "fff", "", "lll"),
new TestData("fff mmmmm lll", "fff", "mmmmm", "lll"),
new TestData("fff mmm1 mm2 lll", "fff", "mmm1 mm2", "lll"),
new TestData("Mr. Dr. Tom Jones", "Tom", "", "Jones"),
new TestData("Robert P. Bethea Jr.", "Robert", "P.", "Bethea"),
new TestData("Charles P. Adams, Jr.", "Charles", "P.", "Adams"),
new TestData("B. Herbert Boatner, Jr.", "B.", "Herbert", "Boatner"),
new TestData("Bernard H. Booth IV", "Bernard", "H.", "Booth"),
new TestData("F. Laurens \"Larry\" Brock", "F.", "Laurens \"Larry\"", "Brock"),
new TestData("Chris A. D'Amour", "Chris", "A.", "D'Amour") };
NameParser bp = new NameParser();
for (int i = 0; i < td.length; i++) {
bp.parse(td[i].name);
Assert.assertEquals(td[i].firstName, bp.getFirstName());
Assert.assertEquals(td[i].lastName, bp.getLastName());
Assert.assertEquals(td[i].middleName, bp.getMiddleName());
}
}
}