C ++ слишком много записи / чтения размером 4 - PullRequest
2 голосов
/ 15 февраля 2012

Я пишу программу на C ++, которая включает в себя анализ большого количества текстовых файлов.Я сталкиваюсь с множеством ошибок при запуске моей программы с использованием valgrind.

Я использую Ubuntu 11, компилятор g ++;Цель программы - получить в качестве аргументов необязательный справочный файл, основной файл для работы, содержащий более 10 миллионов строк, и корневое имя 14 создаваемых файлов.

Я начну сПервая основная проблема: следуя странному поведению программы (иногда она работает, иногда нет, в зависимости от файлов и строки, передаваемой в качестве аргументов и т. д.), я решил проверить это с помощью valgrind.Он производит огромное количество ошибок при манипуляциях с памятью.Начиная с переменной argv.

    #include <iostream>
    #include <stdio.h>
    #include <stdlib.h>
    #include <sstream>
    #include "coverSamMulti.h"
    #include "string.h"

    using namespace std;

    int main(int argc, char **argv) {
bool mutYN;
char * filerefgen; // The reference genome files
char * filemap; // the mapping file
char * fileroot; // the root name of all the generated files

// two or three arguments?
if (argc == 3)
    {
        mutYN= false;
        filemap = new char[strlen(argv[1])+1];
        fileroot = new char[strlen(argv[2])+1];
        strcpy(filemap,argv[1]);
        strcpy(fileroot,argv[2]);
    }
    else if (argc == 4)
    {
        mutYN= true;
        filerefgen = new char[strlen(argv[1])+1];
        filemap = new char[strlen(argv[2])+1];
        fileroot = new char[strlen(argv[3])+1];
        strcpy(filerefgen,argv[1]);
        strcpy(filemap,argv[2]);
        strcpy(fileroot,argv[3]);
    }
    else {
        cout << " \n Less or or too much parameter passed \n";
        return 0;
    }


    //call of the main function

    GenF_Inf genfinf = ScanReadsMap(filemap);

    // We will store for each positions and in 14 files respectively the coverage, the coverage of A, G, C and T,
    // the number mismatches,the number of first position alignment, the number of first position alignment on the reverse strand,
    // and the consensus genome of the sample, the segments size, the insertfs, the firsts, the ins and the del,



                char * filecov = (char *)malloc(sizeof(char)*(strlen(fileroot)+10));
                char * filecov_A = (char *)malloc(sizeof(char)*(strlen(fileroot)+12));
                char * filecov_G = (char *)malloc(sizeof(char)*(strlen(fileroot)+12));
                char * filecov_C = (char *)malloc(sizeof(char)*(strlen(fileroot)+12));
                char * filecov_T = (char *)malloc(sizeof(char)*(strlen(fileroot)+12));
                char * filemis= (char *)malloc(sizeof(char)*(strlen(fileroot)+10));
                char * filefirst= (char *)malloc(sizeof(char)*(strlen(fileroot)+10));
                char * filefirstr = (char *)malloc(sizeof(char)*(strlen(fileroot)+9));
                char * filecons = (char *)malloc(sizeof(char)*(strlen(fileroot)+9));
                char * fileseg = (char *)malloc(sizeof(char)*(strlen(fileroot)+9));
                char * fileinsert = (char *)malloc(sizeof(char)*(strlen(fileroot)+12)); // the different insertsize
                char * filefirsts = (char *)malloc(sizeof(char)*(strlen(fileroot)+12)); // the first position  on single mapped reads
                char * fileins = (char *)malloc(sizeof(char)*(strlen(fileroot)+9)); // the different insertsize
                char * filedel = (char *)malloc(sizeof(char)*(strlen(fileroot)+9)); // the first position  on single mapped reads

                //building files' name;

                strcpy(filecov,fileroot);
                strcpy(filecov_A,fileroot);
                strcpy(filecov_C,fileroot);
                strcpy(filecov_G,fileroot);
                strcpy(filecov_T,fileroot);
                strcpy(filemis,fileroot);
                strcpy(filefirst,fileroot);
                strcpy(filefirstr,fileroot);
                strcpy(filecons,fileroot);
                strcpy(fileseg,fileroot);
                strcpy(fileinsert,fileroot);
                strcpy(filefirsts,fileroot);
                strcpy(fileins,fileroot);
                strcpy(filedel,fileroot);

                strcat(filecov, "_cov.txt");
                strcat(filecov_A, "_cov_A.txt");
                strcat(filecov_C, "_cov_C.txt");
                strcat(filecov_G, "_cov_G.txt");
                strcat(filecov_T, "_cov_T.txt");
                strcat(filemis, "_mis.txt");
                strcat(filefirst, "_first.txt");
                strcat(filefirstr, "_firstr.txt");
                strcat(filecons, "_cons.txt");
                strcat(fileseg, "_seg.txt");
                strcat(fileinsert, "_insert.txt");
                strcat(filefirsts, "_firsts.txt");
                strcat(fileins, "_ins.txt");
                strcat(filedel, "_del.txt");

                ofstream covf(filecov);
                ofstream covf_A(filecov_A);
                ofstream covf_C(filecov_C);
                ofstream covf_G(filecov_G);
                ofstream covf_T(filecov_T);
                ofstream misf(filemis);
                ofstream firstf(filefirst);
                ofstream firstrevf(filefirstr);
                ofstream consf(filecons);
                ofstream segf(fileseg);
                ofstream insertf(fileinsert);
                ofstream firstsf(filefirsts);
                ofstream insf(fileins);
                ofstream delf(filedel);


                // generating the files

cout << "\n ====================================== \n ";
cout << "\n Generating the files   \n ";
cout << "\n ====================================== \n ";



for(int j=0;j < NSEG; j++)
    {   segf << genfinf.start[j];
            for ( int i =0; i <genfinf.lenght[j]; i++)
            { if ((j!=NSEG-1) && (i!= genfinf.lenght[NSEG-1]-1)){
                firstf << genfinf.mapfinf[j].firstposcov[i] << ",";
                misf << genfinf.mapfinf[j].nbmismatch[i]<< ",";
                covf << genfinf.mapfinf[j].poscov[i] << ",";
                covf_A << genfinf.mapfinf[j].poscov_A[i]<< ",";
                covf_C << genfinf.mapfinf[j].poscov_C[i]<< ",";
                covf_G << genfinf.mapfinf[j].poscov_G[i]<< ",";
                covf_T << genfinf.mapfinf[j].poscov_T[i]<< ",";
                firstrevf<< genfinf.mapfinf[j].first_rev[i]<< ",";
                insf << genfinf.mapfinf[j].ins[i]<< ",";
                delf << genfinf.mapfinf[j].del[i]<< ",";
                firstsf << genfinf.mapfinf[j].firstposcovsingle[i]<< ",";
            }}
    }


for(int j=0;j < 999; j++)   insertf << genfinf.insert[j]<< ",";

int j = NSEG-1;int i = genfinf.lenght[NSEG-1]-1;

firstf << genfinf.mapfinf[j].firstposcov[i] ;
misf << genfinf.mapfinf[j].nbmismatch[i];
covf << genfinf.mapfinf[j].poscov[i] ;
covf_A << genfinf.mapfinf[j].poscov_A[i];
covf_C << genfinf.mapfinf[j].poscov_C[i];
covf_G << genfinf.mapfinf[j].poscov_G[i];
covf_T << genfinf.mapfinf[j].poscov_T[i];
firstrevf<< genfinf.mapfinf[j].first_rev[i];
insf << genfinf.mapfinf[j].ins[i];
delf << genfinf.mapfinf[j].del[i];
firstsf << genfinf.mapfinf[j].firstposcovsingle[i];

j =999;
insertf << genfinf.insert[j];



//building the consensus genome

cout << "\n ====================================== \n ";
cout << "\n building the consensus  \n ";
cout << "\n ====================================== \n ";



int A,G,C,T;

for(int j=0;j < NSEG; j++)
        {  consf<<">segment_"<<j+1<<"\n";
            int k = 0; // for newline after 60 bp;
                for ( i =0; i <genfinf.lenght[j]; i++)
                {
                    A=genfinf.mapfinf[j].poscov_A[i]; C=genfinf.mapfinf[j].poscov_C[i];
                    G=genfinf.mapfinf[j].poscov_G[i]; T=genfinf.mapfinf[j].poscov_T[i];
                    if    ((A>=G)&&(A>=C)&&(A>=T))   {if (A!=0)consf<<"A"; else         consf<<"N";}
                    else if ((C>=A)&&(C>=G)&&(C>=T))  consf<<"C";
                    else if ((G>=A)&&(G>=C)&&(G>=T))  consf<<"G";
                    else if (((T>=A)&&(T>=G)&&(T>=C)))consf<<"T";
                    k++;
                    if (k ==59 ){consf<<"\n";k=0;}
                }
                consf<<"\n";
        }


    // closing all files

covf.close();
covf_A.close();
covf_C.close();
covf_G.close();
covf_T.close();
misf.close();
firstf.close();
firstrevf.close();
consf.close();
segf.close();
insertf.close();
firstsf.close();
insf.close();
delf.close();



// finding mutations


 if (mutYN)
 {

        cout << "\n ====================================== \n ";
        cout << "\n  computing mutations \n ";
        cout << "\n ====================================== \n ";


        // Preparing the mutations file.
        char * filemut = (char *)malloc(sizeof(char)*(strlen(fileroot)+9));
        strcpy(filemut,fileroot);
        strcat(filemut, "_mut.txt");
        ofstream mutf(filemut);

        char chrg, chcg;// Char from the reference genome, Char from the consensus genome
        string strheader; // the first line of the reference genome file

        ifstream refgen(filerefgen);
        ifstream consf(filecons);
           if ( !refgen )
                        {
                               puts("Cannot open open the file") ;
                               refgen.close() ;
                               exit(0);
                        }
           else
                     {
               getline(refgen, strheader);
               // read in the file without catching any non alpha caracter like space, tab, etc...
               do {chrg = refgen.get();} while (!isalpha(chrg) && (chrg !=EOF ));
               do {chcg=consf.get(); } while (!isalpha(chcg)&& (chcg !=EOF ));
               int i =0;

                      while (( chrg !=EOF )&&(chcg !=EOF ))
                        { if (chrg != chcg)
                        {
                            mutf<< "Position "<< i << " : \n";
                            mutf << "\t"<<chrg<<" by "<<chcg<<"\n";
                        }
                       do {chrg = refgen.get();} while (!isalpha(chrg) && (chrg !=EOF ));
                       do {chcg=consf.get(); } while (!isalpha(chcg)&& (chcg !=EOF ));
                       i++;
                        }
                     }

            cout << "\n Mutations computed \n ";
           mutf.close();

    }

    cout << "\n ====================================== \n ";
    cout << "\n all files generated successfuly \n ";
    cout << "\n ====================================== \n ";


        return 0;

}

И только небольшая часть вывода Valrind, первые строки:

     ==28950== Memcheck, a memory error detector
     ==28950== Copyright (C) 2002-2011, and GNU GPL'd, by Julian Seward et al.
     ==28950== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info
     ==28950== Command: ./echantillon Ftooshort.sam covsam/echan1
     ==28950== Parent PID: 1928
     ==28950== 
     ==28950== Warning: client switching stacks?  SP change: 0xbeee91f8 --> 0xbeb8b910
     ==28950==          to suppress, use: --max-stackframe=3528936 or greater
     ==28950== Invalid write of size 4
     ==28950==    at 0x8049000: main (echantillon.cpp:11)
     ==28950==  Location 0xbeb8b92c is 0 bytes inside local var "argv"
     ==28950==  declared at echantillon.cpp:11, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid write of size 4
     ==28950==    at 0x804900D: main (echantillon.cpp:11)
     ==28950==  Address 0xbeee91ec is on thread 1's stack
     ==28950== 
     ==28950== Invalid write of size 1
     ==28950==    at 0x804901B: main (echantillon.cpp:20)
     ==28950==  Location 0xbeee7f8d is 0 bytes inside local var "mutYN"
     ==28950==  declared at echantillon.cpp:12, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid read of size 4
     ==28950==    at 0x8049022: main (echantillon.cpp:21)
     ==28950==  Location 0xbeb8b92c is 0 bytes inside local var "argv"
     ==28950==  declared at echantillon.cpp:11, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid write of size 4
     ==28950==    at 0x804902D: main (echantillon.cpp:21)
     ==28950==  Address 0xbeb8b910 is on thread 1's stack
     ==28950== 
     ==28950== Invalid read of size 4
     ==28950==    at 0x402A225: strlen (mc_replace_strmem.c:390)
     ==28950==    by 0x8049034: main (echantillon.cpp:21)
     ==28950==  Location 0xbeb8b910 is 0 bytes inside local var "str"
     ==28950==  declared at mc_replace_strmem.c:390, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid write of size 4
     ==28950==    at 0x8049038: main (echantillon.cpp:21)
     ==28950==  Address 0xbeb8b910 is on thread 1's stack
     ==28950== 
     ==28950== Invalid read of size 4
     ==28950==    at 0x4029149: operator new[](unsigned int) (vg_replace_malloc.c:343)
     ==28950==    by 0x804903F: main (echantillon.cpp:21)
     ==28950==  Location 0xbeb8b910 is 0 bytes inside local var "n"
     ==28950==  declared at vg_replace_malloc.c:343, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid write of size 4
     ==28950==    at 0x8049040: main (echantillon.cpp:21)
     ==28950==  Location 0xbeee7f18 is 0 bytes inside local var "filemap"
     ==28950==  declared at echantillon.cpp:14, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid read of size 4
     ==28950==    at 0x8049046: main (echantillon.cpp:22)
     ==28950==  Location 0xbeb8b92c is 0 bytes inside local var "argv"
     ==28950==  declared at echantillon.cpp:11, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid write of size 4
     ==28950==    at 0x8049051: main (echantillon.cpp:22)
     ==28950==  Address 0xbeb8b910 is on thread 1's stack
     ==28950== 
     ==28950== Invalid read of size 4
     ==28950==    at 0x402A225: strlen (mc_replace_strmem.c:390)
     ==28950==    by 0x8049058: main (echantillon.cpp:22)
     ==28950==  Location 0xbeb8b910 is 0 bytes inside local var "str"
     ==28950==  declared at mc_replace_strmem.c:390, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid write of size 4
     ==28950==    at 0x804905C: main (echantillon.cpp:22)
     ==28950==  Address 0xbeb8b910 is on thread 1's stack
     ==28950== 
     ==28950== Invalid read of size 4
     ==28950==    at 0x4029149: operator new[](unsigned int) (vg_replace_malloc.c:343)
     ==28950==    by 0x8049063: main (echantillon.cpp:22)
     ==28950==  Location 0xbeb8b910 is 0 bytes inside local var "n"
     ==28950==  declared at vg_replace_malloc.c:343, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid write of size 4
     ==28950==    at 0x8049064: main (echantillon.cpp:22)
     ==28950==  Location 0xbeee7f1c is 0 bytes inside local var "fileroot"
     ==28950==  declared at echantillon.cpp:15, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid read of size 4
     ==28950==    at 0x804906A: main (echantillon.cpp:23)
     ==28950==  Location 0xbeb8b92c is 0 bytes inside local var "argv"
     ==28950==  declared at echantillon.cpp:11, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid write of size 4
     ==28950==    at 0x8049075: main (echantillon.cpp:23)
     ==28950==  Address 0xbeb8b914 is on thread 1's stack
     ==28950== 
     ==28950== Invalid read of size 4
     ==28950==    at 0x8049079: main (echantillon.cpp:23)
     ==28950==  Location 0xbeee7f18 is 0 bytes inside local var "filemap"
     ==28950==  declared at echantillon.cpp:14, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid write of size 4
     ==28950==    at 0x804907F: main (echantillon.cpp:23)
     ==28950==  Address 0xbeb8b910 is on thread 1's stack
     ==28950== 
     ==28950== Invalid read of size 4
     ==28950==    at 0x402A269: strcpy (mc_replace_strmem.c:429)
     ==28950==    by 0x8049086: main (echantillon.cpp:23)
     ==28950==  Location 0xbeb8b914 is 0 bytes inside local var "src"
     ==28950==  declared at mc_replace_strmem.c:429, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid read of size 4
     ==28950==    at 0x402A26C: strcpy (mc_replace_strmem.c:429)
     ==28950==    by 0x8049086: main (echantillon.cpp:23)
     ==28950==  Location 0xbeb8b910 is 0 bytes inside local var "dst"
     ==28950==  declared at mc_replace_strmem.c:429, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid read of size 4
     ==28950==    at 0x8049087: main (echantillon.cpp:24)
     ==28950==  Location 0xbeb8b92c is 0 bytes inside local var "argv"
     ==28950==  declared at echantillon.cpp:11, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid write of size 4
     ==28950==    at 0x8049092: main (echantillon.cpp:24)
     ==28950==  Address 0xbeb8b914 is on thread 1's stack
     ==28950== 
     ==28950== Invalid read of size 4
     ==28950==    at 0x8049096: main (echantillon.cpp:24)
     ==28950==  Location 0xbeee7f1c is 0 bytes inside local var "fileroot"
     ==28950==  declared at echantillon.cpp:15, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid write of size 4
     ==28950==    at 0x804909C: main (echantillon.cpp:24)
     ==28950==  Address 0xbeb8b910 is on thread 1's stack
     ==28950== 
     ==28950== Invalid read of size 4
     ==28950==    at 0x402A269: strcpy (mc_replace_strmem.c:429)
     ==28950==    by 0x80490A3: main (echantillon.cpp:24)
     ==28950==  Location 0xbeb8b914 is 0 bytes inside local var "src"
     ==28950==  declared at mc_replace_strmem.c:429, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid read of size 4
     ==28950==    at 0x402A26C: strcpy (mc_replace_strmem.c:429)
     ==28950==    by 0x80490A3: main (echantillon.cpp:24)
     ==28950==  Location 0xbeb8b910 is 0 bytes inside local var "dst"
     ==28950==  declared at mc_replace_strmem.c:429, in frame #0 of thread 1
     ==28950== 

     // And many others almost 30 per each mentioning char * or file I mean fileroot, filemap, filecons, filedes, and so on.

     ==28950== 
     ==28950== Invalid read of size 4
     ==28950==    at 0x804D29C: std::operator|(std::_Ios_Openmode, std::_Ios_Openmode) (ios_base.h:122)
     ==28950==    by 0x8049867: main (echantillon.cpp:99)
     ==28950==  Location 0xbeb8b914 is 0 bytes inside local var "__b"
     ==28950==  declared at ios_base.h:121, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid write of size 4
     ==28950==    at 0x8049868: main (echantillon.cpp:99)
     ==28950==  Address 0xbeb8b918 is on thread 1's stack
     ==28950== 
     ==28950== Invalid read of size 4
     ==28950==    at 0x804986C: main (echantillon.cpp:99)
     ==28950==  Location 0xbeee7f3c is 0 bytes inside local var "filecov"
     ==28950==  declared at echantillon.cpp:52, in frame #0 of thread 1
     ==28950== 
     ==28950== Invalid write of size 4
     ==28950==    at 0x8049872: main (echantillon.cpp:99)
     ==28950==  Address 0xbeb8b914 is on thread 1's stack

Итак, пожалуйста, кто-нибудь может помочьмне разобраться в этом вопросе?

1 Ответ

2 голосов
/ 15 февраля 2012

Из Вальгринда Документы :

Предупреждение: клиентская коммутация стеков?

Valgrind обнаружил такое большое изменение в указателе стека, что догадывается, что клиент переключается на другой стек. На данный момент это делает хитрое предположение, где находится база нового стека, и устанавливает разрешения памяти соответственно. Вы можете получить много поддельных сообщений об ошибках после этого, если Вальгринд не угадает. На данный момент "большой изменение "определяется как изменение более 2000000 в значении регистр указателя стека.

Попробуйте сначала исправить это, и посмотрите, исчезнут ли последующие ошибки. В журнале рассказывается, как это уже исправить.

Если это не решит вашу проблему, пожалуйста, опубликуйте минимальный скомпилированный код, который воспроизводит его - ваш ОП выше огромен и выглядит в основном неактуальным.

...