// fasta454.cpp : Defines the entry point for the console application. // ./fasta_454 mg1655_2_1.txt 50 > mg1655_2_1_out.txt //#include "stdafx.h" #include #include #include #include #include // Program designed by Alex E. Pozhitkov, 2011, University of Washington // pozhit@washington.edu //#define SUPERCOMP #if defined SUPERCOMP #include #endif using namespace std; struct Seq454 { string s_ID; int i_Rank; float f_x; float f_y; string s_Sequence; string s_OppositeStrand; bool bRedundantDirect; bool bRedundantOpposite; bool bHeadsMatch; }; ostream& ReportCoOccurrence(const Seq454& BiggerSeq, const Seq454& SmallerSeq, int idPos, int iOppos) { return cout << '\n' << BiggerSeq.s_ID << '\t' << BiggerSeq.s_Sequence << '\t' << SmallerSeq.s_ID << '\t' < AllSequences; Seq454 s_454individ; ifstream InputFile; int ibpLimit = 0; if(argc != 3) throw 1; sscanf(argv[2], "%d", &ibpLimit); InputFile.open(argv[1]); while(!InputFile.eof()) { getline(InputFile, str); if(str == "") continue; if(str.find('>') != string::npos) { //this is a header of a new sequence s_454individ.s_OppositeStrand = ""; s_454individ.s_OppositeStrand.resize(s_454individ.s_Sequence.length()); string::iterator fops = s_454individ.s_OppositeStrand.begin(); for (string::reverse_iterator ri = s_454individ.s_Sequence.rbegin(); ri < s_454individ.s_Sequence.rend(); ri++) { //making second strand switch (*ri) { case 'A': *fops++ = 'T'; break; case 'T': *fops++ = 'A'; break; case 'G': *fops++ = 'C'; break; case 'C': *fops++ = 'G'; break; default: *fops++ = 'N'; } } s_454individ.bHeadsMatch = false; s_454individ.bRedundantDirect = false; s_454individ.bRedundantOpposite = false; AllSequences.push_back(s_454individ); //save previous seq s_454individ.s_Sequence = ""; int pos1; pos1 = str.find_first_of(' '); s_454individ.s_ID = str.substr(1, pos1 - 1); pos1 = str.find_first_of("rank="); sscanf((str.substr(pos1+5)).c_str(), "%d", &s_454individ.i_Rank); pos1 = str.find_first_of("x="); sscanf((str.substr(pos1+2)).c_str(), "%f", &s_454individ.f_x); pos1 = str.find_first_of("y="); sscanf((str.substr(pos1+2)).c_str(), "%f", &s_454individ.f_y); } else s_454individ.s_Sequence += str; } AllSequences.push_back(s_454individ); //save last record AllSequences.erase(AllSequences.begin()); // remove first empty record cout << "BiggerSeqID\tBiggerSeq\tCooccurredSmallerID\tCooccurredSmallerSeq\tCooccurredSmallerOppositeStrand\tPosDirect\tPosOpposite"; for (int i = 0; i < AllSequences.size(); i++) { cerr << '\r' << (float) i / AllSequences.size()<<"\t\t"; if(AllSequences[i].s_Sequence.length() < ibpLimit) continue; //sequence too short for (int j = i+1; j < AllSequences.size(); j++) { int iPosDirect, iPosOpposite; if (AllSequences[i].s_Sequence.length() > AllSequences[j].s_Sequence.length()) { iPosDirect = AllSequences[i].s_Sequence.find(AllSequences[j].s_Sequence); iPosOpposite = AllSequences[i].s_Sequence.find(AllSequences[j].s_OppositeStrand); if((iPosDirect != string::npos) || ( iPosOpposite != string::npos)) { ReportCoOccurrence(AllSequences[i], AllSequences[j], iPosDirect, iPosOpposite); AllSequences[i].bRedundantDirect |= (iPosDirect != string::npos); AllSequences[i].bRedundantOpposite |= ( iPosOpposite != string::npos); AllSequences[j].bRedundantDirect |= (iPosDirect != string::npos); AllSequences[j].bRedundantOpposite |= ( iPosOpposite != string::npos); AllSequences[i].bHeadsMatch = (iPosDirect == 0) || (iPosOpposite == 0); AllSequences[j].bHeadsMatch = AllSequences[i].bHeadsMatch; //break; //exaustive search if commented } } else { iPosDirect = AllSequences[j].s_Sequence.find(AllSequences[i].s_Sequence); iPosOpposite = AllSequences[j].s_Sequence.find(AllSequences[i].s_OppositeStrand); if((iPosDirect != string::npos) || ( iPosOpposite != string::npos)) { ReportCoOccurrence(AllSequences[j], AllSequences[i], iPosDirect, iPosOpposite); AllSequences[i].bRedundantDirect |= (iPosDirect != string::npos); AllSequences[i].bRedundantOpposite |= ( iPosOpposite != string::npos); AllSequences[j].bRedundantDirect |= (iPosDirect != string::npos); AllSequences[j].bRedundantOpposite |= ( iPosOpposite != string::npos); AllSequences[i].bHeadsMatch = (iPosDirect == 0) || (iPosOpposite == 0); AllSequences[j].bHeadsMatch = AllSequences[i].bHeadsMatch; //break; //exhaustive search if commented } } } } int i_RD, i_RO, i_HM; i_RD = 0; i_RO = 0; i_HM = 0; for (vector::const_iterator S = AllSequences.begin(); S != AllSequences.end(); S++) { i_RD += S->bRedundantDirect; i_RO += S->bRedundantOpposite; i_HM += S->bHeadsMatch; } cout <<"\nRedundant_Direct\tRedundant_Opposite\tHeads_Match\tTotal\n"<< i_RD << '\t' << i_RO <<'\t' << i_HM << '\t'<