Мои данные могут быть загружены отсюда
tuesdata <- tidytuesdayR::tt_load(2020, week = 4)
spotify <- tuesdata$spotify_songs
Я хочу иметь track_name без каких-либо () и текст внутри него.
Ожидаемый результат: если мой трек называется RITMO (Bad Boys For Life), я хочу, чтобы он имел только RITMO.
Мы можем включить в (), например, Bad Boys For Life, роль в отдельном столбцовом умении. Я пытаюсь это сделать, используя ниже, видел 10-20 SO вопрос, но не смог понять это.
spotify %>%
extract(., track_name, into = c("track_name2", "feat"), "(\\.+)\\s*(.*)",remove = F)
Я знаю, что с моим регулярным выражением что-то не так, но не уверен, как получить ожидаемый результат
Добавление dput для spotify
structure(list(track_id = c("6f807x0ima9a1j3VPbc7VN", "0r7CVbZTWZgbTCYdfa2P31",
"1z1Hg7Vb0AhHDiEmnDE79l", "75FpbthrwQmzHlBJLuGdC7", "1e8PAfcKUYoKkxPhrHqw4x",
"7fvUMiyapMsRRxr07cU8Ef"), track_name = c("I Don't Care (with Justin Bieber) - Loud Luxury Remix",
"Memories - Dillon Francis Remix", "All the Time - Don Diablo Remix",
"Call You Mine - Keanu Silva Remix", "Someone You Loved - Future Humans Remix",
"Beautiful People (feat. Khalid) - Jack Wins Remix"), track_artist = c("Ed Sheeran",
"Maroon 5", "Zara Larsson", "The Chainsmokers", "Lewis Capaldi",
"Ed Sheeran"), track_popularity = c(66, 67, 70, 60, 69, 67),
track_album_id = c("2oCs0DGTsRO98Gh5ZSl2Cx", "63rPSO264uRjW1X5E6cWv6",
"1HoSmj2eLcsrR0vE9gThr4", "1nqYsOef1yKKuGOVchbsk6", "7m7vv9wlQ4i0LFuJiE2zsQ",
"2yiy9cd2QktrNvWC2EUi0k"), track_album_name = c("I Don't Care (with Justin Bieber) [Loud Luxury Remix]",
"Memories (Dillon Francis Remix)", "All the Time (Don Diablo Remix)",
"Call You Mine - The Remixes", "Someone You Loved (Future Humans Remix)",
"Beautiful People (feat. Khalid) [Jack Wins Remix]"), track_album_release_date = c("2019-06-14",
"2019-12-13", "2019-07-05", "2019-07-19", "2019-03-05", "2019-07-11"
), playlist_name = c("Pop Remix", "Pop Remix", "Pop Remix",
"Pop Remix", "Pop Remix", "Pop Remix"), playlist_id = c("37i9dQZF1DXcZDD7cfEKhW",
"37i9dQZF1DXcZDD7cfEKhW", "37i9dQZF1DXcZDD7cfEKhW", "37i9dQZF1DXcZDD7cfEKhW",
"37i9dQZF1DXcZDD7cfEKhW", "37i9dQZF1DXcZDD7cfEKhW"), playlist_genre = c("pop",
"pop", "pop", "pop", "pop", "pop"), playlist_subgenre = c("dance pop",
"dance pop", "dance pop", "dance pop", "dance pop", "dance pop"
), danceability = c(0.748, 0.726, 0.675, 0.718, 0.65, 0.675
), energy = c(0.916, 0.815, 0.931, 0.93, 0.833, 0.919), key = c(6,
11, 1, 7, 1, 8), loudness = c(-2.634, -4.969, -3.432, -3.778,
-4.672, -5.385), mode = c(1, 1, 0, 1, 1, 1), speechiness = c(0.0583,
0.0373, 0.0742, 0.102, 0.0359, 0.127), acousticness = c(0.102,
0.0724, 0.0794, 0.0287, 0.0803, 0.0799), instrumentalness = c(0,
0.00421, 2.33e-05, 9.43e-06, 0, 0), liveness = c(0.0653,
0.357, 0.11, 0.204, 0.0833, 0.143), valence = c(0.518, 0.693,
0.613, 0.277, 0.725, 0.585), tempo = c(122.036, 99.972, 124.008,
121.956, 123.976, 124.982), duration_ms = c(194754, 162600,
176616, 169093, 189052, 163049)), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -6L))