Найти и удалить столбцы на основе атрибутов во фрейме данных - PullRequest
0 голосов
/ 15 февраля 2019

Приведенные ниже данные имеют атрибуты, встроенные в каждый из столбцов и использующие атрибуты, которые я хочу проверить, имеет ли столбец атрибут attr(,"SpotfireColumnMetaData")$DP.UniqueId.

Если столбец имеет эти атрибуты, то я хочу сохранить ихв кадре данных, в противном случае я хочу удалить столбцы, не имеющие этого конкретного атрибута.

Существует ли эффективный способ достижения этого, кроме использования for loop()?

Данные

structure(list(MIR.tst_temp = structure(NA_character_, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "", DP.TestName = "MIR.tst_temp", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), PRR.part_id = structure("1", SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "", DP.TestName = "PRR.part_id", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), PRR.part_id.count = structure(0L, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "", DP.TestName = "PRR.part_id.count", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), PRR.hard_bin = structure(21L, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "bin", DP.TestName = "PRR.hard_bin", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), PRR.soft_bin = structure(2100L, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "bin", DP.TestName = "PRR.soft_bin", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), WIR.wafer_id = structure(NA_character_, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "wafer", DP.TestName = "WIR.wafer_id", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), MIR.lot_id = structure(NA_character_, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "lot", DP.TestName = "MIR.lot_id", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), PRR.x_coord = structure(NA_integer_, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "diex", DP.TestName = "PRR.x_coord", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), PRR.y_coord = structure(NA_integer_, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "diey", DP.TestName = "PRR.y_coord", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), PRR.site_num = structure(0L, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "site", DP.TestName = "PRR.site_num", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), WRR.site_grp = structure(NA_integer_, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "", DP.TestName = "WRR.site_grp", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), WRR.head_num = structure(NA_integer_, SpotfireColumnMetaData = list(
    DP.TestNumber = "Index", DP.Type = "", DP.TestName = "WRR.head_num", 
    DP.Info = "", DP.TestUnit = "", DP.Statistic = "", DP.Program = "", 
    DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, Limits.Prod.Lower = -Inf, 
    Limits.Prod.Target = NaN, Limits.Prod.Upper = Inf, Limits.Spec.Lower = -Inf, 
    Limits.Spec.Target = NaN, Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, 
    Limits.Outlier.Target = NaN, Limits.Outlier.Upper = Inf, 
    Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, Limits.Whatif.Upper = Inf, 
    DP.ParamType = "PARAMETRIC", DP.BlockId = "", DP.Scratch = "", 
    DP.ColumnId = "", Dp.BaseName = "")), `ate_data[, 15]` = structure(-3.862381e-08, SpotfireColumnMetaData = list(
    DP.TestNumber = "13001", DP.Type = "", DP.TestName = "gross_idd_dcvs vdd3v0 14.a302", 
    DP.Info = "PTR.result", DP.TestUnit = "A", DP.Statistic = "raw", 
    DP.Program = "", DP.ScaleFactor = 0L, DP.FilteredOutCells = 0L, 
    Limits.Prod.Lower = -1.04e-07, Limits.Prod.Target = NaN, 
    Limits.Prod.Upper = 1.58e-06, Limits.Spec.Lower = -Inf, Limits.Spec.Target = NaN, 
    Limits.Spec.Upper = Inf, Limits.Outlier.Lower = -Inf, Limits.Outlier.Target = NaN, 
    Limits.Outlier.Upper = Inf, Limits.Whatif.Lower = -Inf, Limits.Whatif.Target = NaN, 
    Limits.Whatif.Upper = Inf, DP.ParamType = "PARAMETRIC", DP.BlockId = "", 
    DP.Scratch = "", DP.ColumnId = "", Dp.BaseName = "", DP.FTR.testtxt = "", 
    DP.PTR.testtxt = "gross_idd_dcvs VDD3V0 14.a302", DP.DTR.textdat = "", 
    DP.MPR.pinnum = "0", DP.UniqueId = "Start"))), class = "data.frame", row.names = c(NA, 
-1L))

1 Ответ

0 голосов
/ 15 февраля 2019

Один из вариантов - циклически проходить по столбцам с помощью sapply, чтобы создать логический индекс для подстановки столбцов

i1 <- sapply(df1, function(x) length(attr(x, 
              "SpotfireColumnMetaData")$DP.UniqueId) > 0)
df1[i1]

или с помощью Filter с is.null

Filter(function(x) !is.null(attr(x, "SpotfireColumnMetaData")$DP.UniqueId ), df1)

Опция tidyverse будет keep

library(dplyr)
library(purrr)
keep(df1, ~ attr(.x, "SpotfireColumnMetaData")$DP.UniqueId %>%
                is.null %>% #check for NULL
                     '!') # negate and keep only those are not nulls

или discard, где мы отбрасываем те столбцы, которые не имеют атрибута

discard(df1,  ~ attr(.x, "SpotfireColumnMetaData")$DP.UniqueId %>% 
                       is.null )
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...