После установки mageck_nest с помощью исходного кода из Github в моей системе Ubuntu 18.04 я получаю следующую ошибку:
(mageck_nest_env) joshua@joshua-X551CAP:~/mageck_nest$ mageck_nest nest -n HSJDPthalaFluteRep -k /home/joshua/CRISPRJan2020/workflowHSJDPthalaRepFlute/results/count/all.count.txt -d /home/joshua/CRISPRJan2020/Jan_2020_DMs/HSJD_Rep_Pthala_Flute_Day21_DM --norm-method total -o
Traceback (most recent call last):
File "/home/joshua/miniconda3/envs/mageck_nest_env/bin/mageck_nest", line 21, in <module>
mageck_nest_main()
File "/home/joshua/miniconda3/envs/mageck_nest_env/bin/mageck_nest", line 8, in mageck_nest_main
args=postargs(initial_args)
File "/home/joshua/mageck_nest/mageck_nest/argParser.py", line 59, in postargs
logging.basicConfig(level=10,
NameError: name 'logging' is not defined
Теперь, насколько я знаю, модуль журналирования должен быть импортирован вместе с 'sys ', поскольку она является частью стандартной библиотеки python3 .7.
Сначала я понял, что, возможно, это потому, что у меня установлены две разные версии python; python 3.6, которая является частью самой системы Ubuntu и установлена conda python 3.7. Чтобы преодолеть это, я создал новую среду conda (mageck_nest_env), в которой нужно запустить MAGeCK_NEST и установить python 3.7.6 в качестве интерпретатора.
Вот скрипт для самого модуля:
#!/usr/bin/env python3
'''
MAGeCK nest main entry
'''
import sys
import random
import math
import logging
import pickle
from collections import defaultdict
from mageck_nest.mleinstanceio import *
from mageck_nest.mleem import iteratenbem
from mageck_nest.mlemeanvar import MeanVarModel
from mageck_nest.mageckCount import normalizeCounts
from mageck_nest.bayes_selection import *
from mageck_nest.dispersion_characterization import *
from mageck_nest.mleargparse import *
from mageck_nest.mageck_nest_PPI import *
from mageck_nest.mageck_nest_output import *
from mageck_nest.mleclassdef import *
from mageck_nest.outliers_candidates import *
class Mageck_nest():
def __init__(self, options):
# Required
self.count_table=options.count_table
self.design_matrix=options.design_matrix
# IO related
self.beta_labels=options.beta_labels
self.include_samples=options.include_samples
self.output_prefix=options.output_prefix
self.file_directory=os.getcwd()
self.output_directory="%s/%s" %(os.getcwd(),self.output_prefix)
if os.path.exists(self.output_directory)==False:
os.makedirs(self.output_directory)
# Normalization related
self.adjust_method=options.adjust_method
self.genes_varmodeling=options.genes_varmodeling
self.negative_control=options.negative_control
self.negative_control_gRNAs=None
self.norm_method=options.norm_method
# PPI and outliers removal
self.outliers_removal=options.outliers_removal
self.PPI_prior=options.PPI_prior
self.QC_metric=options.QC_metric
self.QC_path="/%s/QC_folder" %(self.output_directory)
if os.path.exists(self.QC_path)==False:
os.makedirs(self.QC_path)
self.selection_constant=None
self.PPI_diagnosis=None
self.non_PPI_beta_prior_variance=None
self.PPI_weighting=1
self.suggested_remove_sgRNA=[]
# Others
self.allgenedict=None
self.size_f=None
self.mrm=None
self.log_list=defaultdict(list)
def nest_init(self):
logging.info('Initiating ...')
maxgene=np.inf
if self.negative_control==None:
self.allgenedict,self.invalid_gRNA_dict=read_gene_from_file(self.count_table,includesamples=self.include_samples,negative_control=self.negative_control)
else:
self.allgenedict,self.invalid_gRNA_dict,self.negative_control_gRNAs=read_gene_from_file(self.count_table,includesamples=self.include_samples,negative_control=self.negative_control)
# calculate the size factor
cttab_sel={}
for (geneid,gk) in self.allgenedict.items():
sgid=gk.sgrnaid
sgreadmat=gk.nb_count.getT().tolist()
for i in range(len(sgid)):
cttab_sel[sgid[i]]=sgreadmat[i]
if hasattr(self,'norm_method'):
if self.norm_method!='none':
self.size_f=normalizeCounts(cttab_sel,method=self.norm_method,returnfactor=True,reversefactor=True,negative_control_gRNAs=self.negative_control_gRNAs)
else:
self.size_f=None
else:
self.size_f=normalizeCounts(cttab_sel,returnfactor=True,reversefactor=True)
logging.info('Size factor: '+','.join([str(x) for x in self.size_f]))
desmat=self.design_matrix
#------------------------------
for (tgid,tginst) in self.allgenedict.items():
if tgid not in self.negative_control:
tginst.design_mat=desmat
iteratenbem(tginst,debug=False,estimateeff=False,alpha_val=0.05,size_factor=self.size_f)
tginst.w_estimate=[]
self.non_PPI_beta_prior_variance=beta_non_PPI_prior_calculation(self.allgenedict,self.negative_control)
PPI_import_string_9(self)
PPI_weighting_rewiring(self)
def nest_fitting(self):
logging.info('Estimating dispersion factors ...')
ngenes=0
for (tgid,tginst) in self.allgenedict.items():
if tgid not in self.negative_control:
if ngenes<2000:
try:
sgrna_wide_dispersion_estimation_MAP_v2(tginst,self.design_matrix)
ngenes+=1
except:
pass
logging.info('Modeling the mean and variance ...')
self.mrm=MeanVarModel()
self.mrm.model_mean_disp_by_glm(self.allgenedict,self.output_prefix,self.size_f)
def nest_basic(self):
logging.info('Calculating beta scores ...')
self.suggested_remove_sgRNA=removal_suggestion(self)
for (tgid,tginst) in self.allgenedict.items():
if tgid not in self.negative_control:
n_beta1=tginst.design_mat.shape[1]-1
candidate_removed_tginst=[i for i in tginst.sgrnaid if i in self.suggested_remove_sgRNA]
iteratenbem(tginst,debug=False,meanvarmodel=self.mrm,restart=True,size_factor=self.size_f,beta1_prior_var=self.non_PPI_beta_prior_variance)
temp_non_PPI_beta_prior_variance=self.non_PPI_beta_prior_variance
if abs(np.mean(tginst.beta_estimate[-n_beta1:]))>5:
temp_non_PPI_beta_prior_variance=[i/2 for i in temp_non_PPI_beta_prior_variance]
iteratenbem(tginst,debug=False,meanvarmodel=self.mrm,restart=True,size_factor=self.size_f,beta1_prior_var=temp_non_PPI_beta_prior_variance)
if len(candidate_removed_tginst)>0 and len(tginst.sgrnaid)<30 and len(tginst.sgrnaid)>4 and abs(tginst.beta_estimate[-1])>5:
outliers_index=[orders for orders,sgRNA in enumerate(tginst.sgrnaid) if sgRNA in candidate_removed_tginst]
ratio_record=[]
tginst_record=[]
outliers_index_record=[]
tginst_2=copy.copy(tginst)
temp_2=tginst_2.beta_estimate[-n_beta1:]
for k in range(len(outliers_index)):
i=outliers_index[k]
self.outliers_removal=True
tginst.eff_estimate=[1]*len(tginst.sgrnaid)
tginst.eff_estimate[i]=0
iteratenbem(tginst,debug=False,meanvarmodel=self.mrm,restart=True,removeoutliers=self.outliers_removal,size_factor=self.size_f,beta1_prior_var=self.non_PPI_beta_prior_variance)
temp_non_PPI_beta_prior_variance=self.non_PPI_beta_prior_variance
if abs(np.mean(tginst.beta_estimate[-n_beta1:]))>5:
temp_non_PPI_beta_prior_variance=[i/2 for i in temp_non_PPI_beta_prior_variance]
iteratenbem(tginst,debug=False,meanvarmodel=self.mrm,restart=True,removeoutliers=self.outliers_removal,size_factor=self.size_f,beta1_prior_var=temp_non_PPI_beta_prior_variance)
tginst_1=copy.copy(tginst)
temp_1=tginst_1.beta_estimate[-n_beta1:]
ratio=np.log(abs(np.mean(temp_2)/np.mean(temp_1)))
ratio_record.append(abs(np.mean(temp_1)))
outliers_index_record.append(i)
if ratio>(5-0.2*len(tginst.sgrnaid)):
tginst_record.append(tginst_1)
else:
tginst_record.append(tginst_2)
ratio_record=[[i,j] for i,j in enumerate(ratio_record)]
ratio_record.sort(key=operator.itemgetter(1))
if ratio_record[0][1]>5 and len(ratio_record)>=2:
logging.info(ratio_record[0][1])
two_outliers=[outliers_index_record[ratio_record[0][0]],outliers_index_record[ratio_record[1][0]]]
tginst.eff_estimate=[1]*len(tginst.sgrnaid)
for i in two_outliers:
tginst.eff_estimate[i]=0
iteratenbem(tginst,debug=False,meanvarmodel=self.mrm,restart=True,removeoutliers=self.outliers_removal,size_factor=self.size_f,beta1_prior_var=self.non_PPI_beta_prior_variance)
else:
self.allgenedict[tgid]=tginst_record[ratio_record[0][0]]
for (tgid,tginst) in self.allgenedict.items():
if tgid not in self.negative_control:
if len(tginst.w_estimate)==0:
tginst.w_estimate=np.ones(len(tginst.sgrnaid))
pickle.dump(self,open("/%s/%s_self_nest_major.p" %(self.output_directory,self.output_prefix),'wb'))
#self=pickle.load(open("/%s/%s_self_nest_major.p" %(self.output_directory,self.output_prefix),'rb'))
nest_output(self,["False","False"])
def constant_optimization(self):
self=pickle.load(open("/%s/%s_self_nest_major.p" %(self.output_directory,self.output_prefix),'rb'))
if self.PPI_prior==True:
logging.info('PPI validation...')
PPI_main(self)
if self.PPI_diagnosis==True:
beta_prior_output(self)
if self.PPI_diagnosis==False:
logging.info("The overlapped number of input genes and PPI genes is less than <3000.")
logging.info("PPI is not recommended.")
if self.outliers_removal==True:
logging.info('Estimate selection constant...')
outliers_number_ratio=total_known_outliers_number(self)
if outliers_number_ratio>0.05:
nest_selection_constnat_optimization
logging.info("Selection constant: %s" %self.selection_constant)
for (tgid,tginst) in list(self.allgenedict.items()):
if tgid not in self.negative_control:
nest_selection(tginst,log_list=self.log_list,selection_constant=self.selection_constant)
else:
bayes_selection_constnat_optimization(self)
logging.info("Selection constant: %s" %self.selection_constant)
for (tgid,tginst) in list(self.allgenedict.items()):
if tgid not in self.negative_control:
bayes_selection(tginst,log_list=self.log_list,selection_constant=self.selection_constant)
pickle.dump(self,open("/%s/%s_self_constant_optimization.p" %(self.output_directory,self.output_prefix),'wb'))
def nest_iteration(self):
logging.info('Final iteratioin for PPI or outliers removal...')
os.chdir(self.output_directory)
if self.PPI_prior==False and self.outliers_removal==True:
marks=[[False,True]]
elif self.PPI_prior==True and self.outliers_removal==True:
marks=[[False,True],[True,False],[True,True]]
elif self.PPI_prior==True and self.outliers_removal==False:
marks=[[True,False]]
for mark in marks:
self=pickle.load(open("/%s/%s_self_constant_optimization.p" %(self.output_directory,self.output_prefix),'rb'))
self.PPI_prior=mark[0]
self.outliers_removal=mark[1]
for (tgid,tginst) in self.allgenedict.items():
if tgid not in self.negative_control:
iteratenbem(tginst,debug=False,meanvarmodel=self.mrm,restart=False,PPI_prior=self.PPI_prior,removeoutliers=self.outliers_removal,size_factor=self.size_f,beta1_prior_var=self.non_PPI_beta_prior_variance)
#pickle.dump(self,open("/%s/%s_self_nest_iteration_PPI_%s_outliers_removal_%s.p" %(self.output_directory,self.output_prefix,self.PPI_prior,self.outliers_removal),'wb'))
nest_output(self,[str(mark[0]),str(mark[1])])
Я гарантировал, что все используемые каталоги находятся в $ PYTHONPATH, поэтому не думайте, что это проблема. Я пытался искать ответы более недели и не могу найти способ решить эту проблему, однако я новичок в python, поэтому прошу прощения, если это легко решаемая проблема.