Я не микробиолог, но думаю, что ваша конструкция имени переменной содержит много информации.
Конструкция имени переменной:
<mic|inter><<em>susceptibility</em>><<em>antibiotic</em>>
<mic|inter>
mic
- минимальная ингибирующая концентрация inter
- интерпретация восприимчивости
<susceptibility>
- <mr|ms|vk|px|et>
mr
- устойчивый ms
- чувствительный vk
- px
- et
-
<antibiotic>
Существует два подхода к подтверждению наличия MI C, связанных с имена переменных в наборе данных:
- Способ # 1 - Вывести сравнение переменных набора данных с составленным списком переменных. Список основан на предварительно заданном списке антител, ИЛИ
- Способ № 2 - Деконструктировать имена переменных набора данных в части имен переменных MI C. Отчет о переменных набора данных и любых, возможно, отсутствующих переменных MI C.
Примеры:
Симуляция MI C набор данных - создание данных установить с некоторыми MI C именами переменных
* simluate some data;
data have;
do sampleid = 1 to 1000;
length instrumentid $20.;
format rundate yymmdd10.;
length operator $10.;
array construct_names
micmrMarbo
interpetamk micetamk interpmramk micmramk interpmsamk micmsamk
interppxamk micpxamk interpvkamk micvkamk
interpetimi micetimi interpmrimi micmrimi interpmsimi micmsimi
interppximi micpximi interpvkimi micvkimi
interpmsfubar micmsfubar
interppxfubar micpxfoobar
;
do over construct_names;
construct_names = round(rand("normal", 50,9), 0.25);
end;
output;
end;
run;
Получить метаданные
* get data set variable names as data;
proc contents noprint data=have out=have_names(keep=varnum name);
run;
Way # 1
* compute variable names for expected MIC naming constructs;
* match only expected antibody variables;
data expect_names(keep=sequence name);
* load arrays with construct parts;
array part1(2) $6 ('mic', 'interp');
array part2(5) $2 ('mr', 'ms', 'vk', 'px', 'et');
array part3(4) $10 ('AMK', 'IMIP', 'TOBI', 'TYPO'); /* 4 expected antibodies */
* construct expected names;
do part3_index = 1 to dim(part3);
do part2_index = 1 to dim(part2);
do part1_index = 1 to dim(part1);
sequence + 1;
name = cats(part1[part1_index], part2[part2_index], part3[part3_index]);
output;
end;
end;
end;
run;
* Way 1 data validation: compare data variable names to expectations;
proc sql;
create table name_comparison as
select
varnum,
coalesce(have_names.name,expect_names.name) as name,
case
when have.name is null and expect.name is not null then 'Expected MIC variable was not in the data set'
when have.name is not null and expect.name is null then 'NOT a MIC variable construct'
else 'OK'
end as status
from have_names as have
full join expect_names as expect
on upper(have.name) eq upper(expect.name)
order by have.varnum, expect.sequence
;
ods html file='compare.html' style=plateau;
proc print data=name_comparison;
var varnum;
var name / style=[fontfamily=monospace];
var status;
run;
ods html close;
Отчет будет простым списком, показывающим, как оцениваются имена переменных
Way # 2
Деконструировать имена переменных набора данных и отчет с цветовой кодировкой сетки.
* Compute construct parts and check for completeness;
proc sql;
create table part1 (
order num, mnemonic char(6), meaning char(200)
);
insert into part1
values (1, 'mic', 'minimum inhibitory concentration')
values (2, 'interp', 'susceptibility interpretation')
;
create table part2 (
order num, mnemonic char(6), meaning char(200)
);
insert into part2
values (1, 'mr', '??')
values (2, 'ms', '??')
values (3, 'vk', '??')
values (4, 'px', '??')
values (5, 'et', '??')
;
create table mic_name_prefixes as
select
part1.order as part1z format=2.
, part1.mnemonic as part1
, part2.order as part2z format=2.
, part2.mnemonic as part2
, cats(part1.mnemonic,part2.mnemonic) as prefix
from part1 cross join part2
;
create table antibodies(label="Extract antibody from variable names with proper prefix") as
select
substr(upper(name),length(prefix)+1) as antibody
, min(varnum) as abz format=6.
from have_names
join mic_name_prefixes
on upper(name) like upper(cats(prefix,'%'))
group by antibody
order by abz
;
* sub select CROSS JOIN for complete grid;
* FULL JOIN for complete comparison;
create table name_grid_data as
select
abz, part1z, part2z
, grid.part1, grid.part2, grid.antibody
, coalesce(grid.name,have.name) as varname length=32
, not missing(have.name) as expected_found format=1.
from
( select PREFIX.*, AB.*, cats(part1,part2,antibody) as name
from mic_name_prefixes PREFIX
cross join antibodies AB
) as grid
full join have_names as have
on upper(have.name) = upper(grid.name)
order by
coalesce(abz,have.varnum+1e6), part2z, part1z
;
reset noprint;
select count(distinct antibody) into :abcount trimmed from name_grid_data;
select count(distinct 0) into :abmissing trimmed from name_grid_data where missing(antibody);
%let abcount = %eval(&abcount + &abmissing);
%put NOTE: &=abcount;
%macro cols (from,to);
/* needed for array statement in compute block */
%local index;
%do index = &from %to &to;
_c&index._
%end;
%mend;
ods html file = 'mic_names.html';
proc report data=name_grid_data spanrows missing;
column
part1 part2
antibody,varname /* 'display var under across var' trick, display will be shown */
antibody=ab,expected_found /* same trick with ab alias, to get _c#_ column for compute block logic */
placeholder
;
define part1 / group order=data ' ' style=header;
define part2 / group order=data ' ' style=header;
define antibody / across order=data ' ';
define ab / across order=data ' ' noprint; /* NOPRINT, _c#_ available, but not rendered */
define varname / ' ' style=[fontfamily=monospace];
define placeholder / noprint; /* required for 'display under across' trick */
/* right most column has access to all leftward columns */
compute placeholder;
array name_col %cols(3, %eval(2+&abcount)); /* array for _c#_ columns */
array have_col %cols(%eval(3+&abcount), %eval(2+2*&abcount)); /* array for _c#_ columns */
/* conditionally highlight the missing variables */
do index = 1 to &abcount - &abmissing;
if not missing ( name_col(index) ) then do;
if not have_col(index) then
call define (vname(name_col(index)), 'style', 'style=[background=lightred]');
else
call define (vname(name_col(index)), 'style', 'style=[background=lightgreen]');
end;
end;
endcomp;
run;
ods html close;
Отчет с цветовой кодировкой сетки