Если вы знаете, что имя пользователя находится на второй позиции в предложении, вы можете извлечь предложения из DF и использовать это:
text=c("hi john what are you doing",
"hi sunil what are you doing",
"hello sanjay what are you doing")
for (sentence in text) {
#separate words in sentence
spl <- strsplit(sentence," ")
#extract name and converto to uppercase
name <- toupper(as.character(spl[[1]])[2])
#put a comma after name
name2 <- paste(name, ",", sep="")
#replace original name with new one
spl[[1]][2] <- name2
#loop over the sentence words to recretae the sentence
for ( i in 1:length(spl[[1]])-1 ) {
if (i == 1) sentence2 <- paste(spl[[1]][i], spl[[1]][i+1])
else sentence2 <- paste(sentence2, spl[[1]][i+1])
}
#put in new list (text2)
if (sentence == text[1]) text2 <- c(sentence2)
else text2 <- append( text2, sentence2 )
}
результат:
#text2
#[1] "hi JOHN, what are you doing" "hi SUNIL, what are you doing"
#[3] "hello SANJAY, what are you doing"
, а затем воссоздать фрейм данных.
В противном случае, если ваша позиция имени пользователя в предложении может изменяться, но у вас есть список имен пользователей, которые вам нужно найти, вы также можете проверить, найден ли хотя бы один из них, занять позицию имени пользователя в предложении, заменить, поставить запятую, а затем воссоздать или распечатать ошибку, если не найден.
usernames <- c("john", "sunil", "sanjay")
text=c("hi john what are you doing",
"hi sunil what are you doing",
"hello sanjay what are you doing",
"hello ciao how are you"
)
for (sentence in text) {
user_present <- NA
#separate words in sentence
spl <- strsplit(sentence," ")
#check if a user is present in the sentence
for (user in usernames) {
if ( user %in% spl[[1]]) {
user_present <- user
break
}}
#if at least one user is found
if ( !is.na(user_present) ) {
pos <- which( spl[[1]] == user_present )
#extract name and converto to uppercase
name <- toupper(as.character(spl[[1]])[pos])
#put a comma after name
name2 <- paste(name, ",", sep="")
#replace original name with new one
spl[[1]][2] <- name2
#loop over the sentence words to recretae the sentence
for ( i in 1:length(spl[[1]])-1 ) {
if (i == 0) sentence2 <- paste(spl[[1]][i], spl[[1]][i+1])
else sentence2 <- paste(sentence2, spl[[1]][i+1])
}
#put in new list (text2)
if (sentence == text[1]) text2 <- c(sentence2)
else text2 <- append( text2, sentence2 )
#if NO username in sentence
} else {
#print error message with username and sentence in which not found
err.msg <- paste("NO username found in sentence: ", sentence)
print(err.msg)
}
}
результат:
#[1] "NO username found in sentence: hello ciao how are you"
text2
#[1] " hi JOHN, what are you doing" " hi SUNIL, what are you doing"
#[3] " hello SANJAY, what are you doing"
Надеюсь, это поможет!
###END