После импорта мой набор данных выглядит следующим образом:
Classes ‘data.table’ and 'data.frame': 820600 obs. of 2180 variables:
$ count_comments : int 0 0 0 0 0 2 2 0 0 1 ...
$ count_faves : int 5 2 2 15 1 3 19 5 1 4 ...
$ dateadded : int 1530174689 1530174688 1530174687 1530162494 1530159458 1530158648 1530158074 1529994404 1529992211 1529868922 ...
$ datetaken : chr "2018-05-10 15:50:59" "2018-05-10 15:50:53" "2018-05-10 15:50:03" "2006-11-27 00:00:00" ...
$ dateupload : int 1530174672 1530174671 1530174669 1498275521 1436228321 1482723483 1496706006 1529994381 1529992197 1529868901 ...
$ group_url : chr "https://www.flickr.com/groups/capriceclassic/" "https://www.flickr.com/groups/capriceclassic/" "https://www.flickr.com/groups/capriceclassic/" "https://www.flickr.com/groups/capriceclassic/" ...
$ id :integer64 42341316794 42341318944 42341324184 35456820766 19292939750 31070311463 34738418140 42964602432 ...
$ license : int 6 6 6 0 0 0 0 6 0 6 ...
$ oid.800metres : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Abbey : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Abdomen : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Academicconference : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Academicdress : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Accipitriformes : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Acousticguitar : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Acoustic-electricguitar : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Acrylicpaint : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Actionfigure : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Adolescent : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Adult : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Adventure : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Advertising : num 0 0 0 0 0 ...
$ oid.Aeolianlandform : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Aerialphotography : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Aerobatics : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Aerospaceengineering : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Africanelephant : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Afterglow : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Agaric : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Agaricaceae : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Agaricus : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Agriculturalmachinery : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Agriculture : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Airforce : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Airracing : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Airshow : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Airsports : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Airtravel : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Airbusa320family : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Aircraft : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Aircraftcabin : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Aircraftcarrier : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Aircraftengine : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Airline : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Airliner : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Airplane : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Airport : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Airportapron : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Airportterminal : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Aisle : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Albumcover : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Alcohol : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Alcoholicbeverage : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Ale : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Algae : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Alley : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Alligator : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Alloywheel : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.All-terrainvehicle : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Alps : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Altar : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Amateurboxing : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Amateurwrestling : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Ambulance : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Americanfootball : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Amphibian : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Amphibiousassaultship : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Amphibioustransportdock : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Amphitheatre : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Amusementpark : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Amusementride : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Ancientgreektemple : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Ancienthistory : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Ancientromanarchitecture : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Ancientrome : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Animal : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Animalmigration : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Animalshelter : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Animalsports : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Animaltraining : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Anime : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Annualplant : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Ant : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Antelope : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Antique : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Antiquecar : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Apartment : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Ape : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Apple : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Aqua : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Aquarium : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Aquaticplant : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Aqueduct : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Arabiancamel : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Arcade : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Arch : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Archbridge : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Archaeologicalsite : num 0 0 0 0 0 0 0 0 0 0 ...
$ oid.Archipelago : num 0 0 0 0 0 0 0 0 0 0 ...
[list output truncated]
- attr(*, ".internal.selfref")=<externalptr>
После этого я импортирую второй набор данных со следующей структурой:
Classes ‘data.table’ and 'data.frame': 2333 obs. of 4 variables:
$ Labels : chr "oid.800 metres" "oid.Abbey" "oid.Abdomen" "oid.Academic conference" ...
$ CodingCategory: chr "Unclear" "UrbanContext" "HumanBodyParts" "Unclear" ...
$ Occurrences : num 28 286 666 5925 569 ...
$ Occurrences % : num 0.00341 0.03485 0.08116 0.72203 0.06934 ...
- attr(*, ".internal.selfref")=<externalptr>
>
Как вы можете видеть, каждый ярлык "oid.XXX" назначен одной из 12 категорий кодирования: - UrbanContext - HumanBodyParts - Животные - SportsContext - NatureContex ...
Вот фрагмент из первого набора данных :
count_comments count_faves dateadded datetaken dateupload group_url id license oid.800metres
1: 0 5 1530174689 2018-05-10 15:50:59 1530174672 https://www.flickr.com/groups/capriceclassic/ 42341316794 6 0
2: 0 2 1530174688 2018-05-10 15:50:53 1530174671 https://www.flickr.com/groups/capriceclassic/ 42341318944 6 0
3: 0 2 1530174687 2018-05-10 15:50:03 1530174669 https://www.flickr.com/groups/capriceclassic/ 42341324184 6 0
4: 0 15 1530162494 2006-11-27 00:00:00 1498275521 https://www.flickr.com/groups/capriceclassic/ 35456820766 0 0
5: 0 1 1530159458 2007-05-17 12:38:02 1436228321 https://www.flickr.com/groups/capriceclassic/ 19292939750 0 0
6: 2 3 1530158648 2013-09-26 18:02:39 1482723483 https://www.flickr.com/groups/capriceclassic/ 31070311463 0 0
7: 2 19 1530158074 2017-05-13 10:15:45 1496706006 https://www.flickr.com/groups/capriceclassic/ 34738418140 0 0
8: 0 5 1529994404 2018-05-10 15:09:55 1529994381 https://www.flickr.com/groups/capriceclassic/ 42964602432 6 0
9: 0 1 1529992211 2017-06-26 02:25:33 1529992197 https://www.flickr.com/groups/capriceclassic/ 28146093407 0 0
10: 1 4 1529868922 2018-05-10 13:42:01 1529868901 https://www.flickr.com/groups/capriceclassic/ 42984216801 6 0
oid.Abbey oid.Abdomen oid.Academicconference oid.Academicdress oid.Accipitriformes oid.Acousticguitar oid.Acoustic-electricguitar oid.Acrylicpaint
1: 0 0 0 0 0 0 0 0
2: 0 0 0 0 0 0 0 0
3: 0 0 0 0 0 0 0 0
4: 0 0 0 0 0 0 0 0
5: 0 0 0 0 0 0 0 0
6: 0 0 0 0 0 0 0 0
7: 0 0 0 0 0 0 0 0
8: 0 0 0 0 0 0 0 0
9: 0 0 0 0 0 0 0 0
10: 0 0 0 0 0 0 0 0
oid.Actionfigure oid.Adolescent oid.Adult
1: 0 0 0
2: 0 0 0
3: 0 0 0
4: 0 0 0
5: 0 0 0
6: 0 0 0
7: 0 0 0
8: 0 0 0
9: 0 0 0
10: 0 0 0
Вот фрагмент из второго набора данных :
Labels CodingCategory Occurrences Occurrences %
1: oid.800 metres Unclear 28 0.003412137
2: oid.Abbey UrbanContext 286 0.034852547
3: oid.Abdomen HumanBodyParts 666 0.081160127
4: oid.Academic conference Unclear 5925 0.722032659
5: oid.Academic dress <NA> 569 0.069339508
6: oid.Academicconference Unclear NA NA
7: oid.Academicdress <NA> NA NA
8: oid.Accipitriformes Animals 19 0.002315379
9: oid.Acoustic guitar <NA> 329 0.040092615
10: oid.Acoustic-electric guitar <NA> 43 0.005240068
11: oid.Acrylic paint <NA> 735 0.089568608
12: oid.Acrylicpaint Unclear NA NA
13: oid.Action figure <NA> 3650 0.444796490
14: oid.Actionfigure <NA> NA NA
15: oid.Adolescent HumanBodyParts 1123 0.136851085
16: oid.Adult HumanBodyParts 983 0.119790397
17: oid.Adventure <NA> 17603 2.145137704
18: oid.Advertising <NA> 46194 5.629295637
19: oid.Aeolian landform NatureContext 5911 0.720326590
20: oid.Aeolianlandform NatureContext NA NA
21: oid.Aerial photography Unclear 10382 1.265171825
22: oid.Aerialphotography Unclear NA NA
23: oid.Aerobatics SportsContext 224 0.027297100
24: oid.Aerospace engineering <NA> 1526 0.185961492
25: oid.Aerospaceengineering <NA> NA NA
26: oid.African elephant Animals 7 0.000853034
27: oid.Afterglow NatureContext 1998 0.243480380
28: oid.Agaric NatureContext 1 0.000121862
29: oid.Agaricaceae NatureContext 1 0.000121862
30: oid.Agaricus NatureContext 1 0.000121862
31: oid.Agricultural machinery CarType 34249 4.173653424
32: oid.Agriculture <NA> 12034 1.466487936
33: oid.Air force <NA> 8143 0.992322691
34: oid.Air racing SportsContext 79 0.009627102
35: oid.Air show SportsContext 331 0.040336339
36: oid.Air sports SportsContext 113 0.013770412
37: oid.Air travel <NA> 551 0.067145991
38: oid.Airbus a320 family <NA> 73 0.008895930
39: oid.Aircraft <NA> 20583 2.508286620
40: oid.Aircraft cabin <NA> 3 0.000365586
41: oid.Aircraft carrier <NA> 5 0.000609310
42: oid.Aircraft engine <NA> 59190 7.213014867
43: oid.Airline <NA> 8080 0.984645381
44: oid.Airliner <NA> 1343 0.163660736
45: oid.Airplane <NA> 12043 1.467584694
46: oid.Airport UrbanContext 2386 0.290762856
47: oid.Airport apron <NA> 133 0.016207653
48: oid.Airport terminal UrbanContext 714 0.087009505
49: oid.Airportterminal UrbanContext NA NA
50: oid.Airtravel <NA> NA NA
Сейчас я пытаюсь создать дополнительные столбцы в первом наборе данных, которые дают мне максимальное значение значений "oid.XXX" для всех столбцов "oid.XXX", которые соответствуют определенной категории CodingCategory для каждой строки.,Например: я хочу создать новый столбец с именем «UrbanContext», который содержит максимальное значение для соответствующих значений «Oid.XXX» в конкретной строке.Это то, что я придумал:
require(data.table)
require(QuantPsyc)
library(lmSupport)
library(dbplyr)
#Import header of OID Label Data only
x <- fread("/Users/01_Flickr Car Data/01_Open Image Data Set/01_Raw Data/flickrexport_cars_oid_201903.csv",sep=",", encoding = "Latin-1", header=TRUE, nrows=0)
#Define oid Columns as numeric
colNames <- grep('^oid', names(x), value = TRUE)
colClasses <- rep('numeric', length(colNames))
names(colClasses) <- colNames
#Import OID Label Data based on Open Image Data Set only
flickrcar <- fread("/Users/01_Flickr Car Data/01_Open Image Data Set/01_Raw Data/flickrexport_cars_oid_201903.csv",colClasses = colClasses, sep=",", encoding = "Latin-1", header=TRUE)
str(flickrcar)
#NAs to zeros
f_dowle3 = function(DT) {
for (j in names(DT))
set(DT,which(is.na(DT[[j]])),j,0)
}
f_dowle3(flickrcar)
#Import Coding for Open Image Data Set Labels
flickrcar_label_coding <- fread("/Users/01_Open Image Data Set/01_Raw Data/180220_OID_Labels_Coding_NTT_FINAL.csv", sep=",", header = TRUE)
flickrcar_label_coding[1:50, 1:4]
#Set to Data Table
setDT(flickrcar)
setDT(flickrcar_label_coding)
#Group OID Labels into LabelCategory
humanbodyparts <- flickrcar_label_coding[grep("HumanBodyParts",flickrcar_label_coding$CodingCategory), "Labels"]
urbancontext <- flickrcar_label_coding[grep("UrbanContext",flickrcar_label_coding$CodingCategory), "Labels"]
animals <- flickrcar_label_coding[grep("Animals",flickrcar_label_coding$CodingCategory), "Labels"]
sportscontext <- flickrcar_label_coding[grep("SportsContext",flickrcar_label_coding$CodingCategory), "Labels"]
naturecontext <- flickrcar_label_coding[grep("NaturContext",flickrcar_label_coding$CodingCategory), "Labels"]
exhibitioncontext <- flickrcar_label_coding[grep("ExhibitionContext",flickrcar_label_coding$CodingCategory), "Labels"]
manualimageprocessing <- flickrcar_label_coding[grep("ManualImageProcessing",flickrcar_label_coding$CodingCategory), "Labels"]
regularroadecomtext <- flickrcar_label_coding[grep("RegularRoadContext",flickrcar_label_coding$CodingCategory), "Labels"]
racingcontext <- flickrcar_label_coding[grep("RacingContext",flickrcar_label_coding$CodingCategory), "Labels"]
cartype <- flickrcar_label_coding[grep("CarType",flickrcar_label_coding$CodingCategory), "Labels"]
carparts <- flickrcar_label_coding[grep("CarParts",flickrcar_label_coding$CodingCategory), "Labels"]
carbrand <- flickrcar_label_coding[grep("CarBrand",flickrcar_label_coding$CodingCategory), "Labels"]
#Insert Label Category Columns
flickrcar$HumanBodyParts <- flickrcar[1, which.max(flickrcar[1,humanbodyparts])]
К сожалению, это не работает.
В итоге результат для одной строки должен выглядеть примерно так:
count_comments count_faves dateadded datetaken dateupload group_url id license oid.800metres
1: 0 5 1530174689 2018-05-10 15:50:59 1530174672 https://www.flickr.com/groups/capriceclassic/ 42341316794 6 0
oid.Abbey oid.Abdomen oid.Academicconference oid.Academicdress oid.Accipitriformes oid.Acousticguitar oid.Acoustic-electricguitar oid.Acrylicpaint
1: 0 0 0 0 0 0 0 0
oid.Actionfigure oid.Adolescent oid.Adult NatureContext RoadContext...
1: 0 0 0 0.88 0.54...
Большое спасибо за вашу помощь заранее!