Skip to main content

MATLAB check unique string in file

function identifyDuplicate
clc;

uniqueSeq={};
dupSeq={};

index=1;
uniqueIndex=1;
dupIndex=1;
uniq=[];
dup=[];
isDuplicated = 0;
fid = fopen('1400M_from_287PS_287NS.ranked','r');


tline = fgetl(fid); % ******
 while ischar(tline)
    
     consensusSeq = fgetl(fid); % Consessus: AAACC
     consensusSeq = upper(consensusSeq);

     curSeq = sscanf(consensusSeq,'%*s %s', [1, inf]);
     curSeq = upper(curSeq);

     fgetl(fid); % Threshold
     fgetl(fid); % Coverage
     fgetl(fid); % p-value
     fgetl(fid); % r1
     fgetl(fid); % r2
     fgetl(fid); % r3
     fgetl(fid); % r4
    
     isExist=0;
    
     for en=1:uniqueIndex -1    
         exist = strcmp(curSeq,uniqueSeq{en})
         if exist ==1
            isDuplicated = 1;
             break;
         end
     end
      
     if( isDuplicated == 1 ) % already exist       
         dupSeq{dupIndex}  = {curSeq};      
         dupIndex = dupIndex + 1;
         dup = [dup;index];
     else % not found
        
         uniqueSeq{uniqueIndex} = {curSeq};  
         uniqueIndex = uniqueIndex + 1;
         uniq = [ uniq;index];
     end
      
   
     
     tline = fgetl(fid); % next ******
     index = index + 1;
     isDuplicated = 0;
    
    
 end


 dlmwrite('unique',uniq,'\t'); % index of unique entry
 dlmwrite('dup'   ,dup   ,'\t'); % index of duplicate entry


fclose(fid);


Comments

Popular posts from this blog

MATLAB cross validation

// use built-in function samplesize = size( matrix , 1); c = cvpartition(samplesize,  'kfold' , k); % return the indexes on each fold ///// output in matlab console K-fold cross validation partition              N: 10    NumTestSets: 4      TrainSize: 8  7  7  8       TestSize: 2  3  3  2 ////////////////////// for i=1 : k    trainIdxs = find(training(c,i) ); %training(c,i);  // 1 means in train , 0 means in test    testInxs  = find(test(c,i)       ); % test(c,i);       // 1 means in test , 0 means in train    trainMatrix = matrix (  matrix(trainIdxs ), : );    testMatrix  = matrix (  matrix(testIdxs  ), : ); end //// now calculate performance %%  calculate performance of a partiti...

MATLAB optimization toolbox usage with genetic algorithm

Useful tutorial http://www.mathworks.com/products/global-optimization/description3.html Best example of implementatoin with Constraint, objective function http://www.mathworks.com/help/gads/examples/constrained-minimization-using-the-genetic-algorithm.html More about how to use multi-objective http://www.mathworks.com/discovery/multiobjective-optimization.html http://www.mathworks.com/help/gads/examples/performing-a-multiobjective-optimization-using-the-genetic-algorithm.html http://www.mathworks.com/help/gads/examples/multiobjective-genetic-algorithm-options.html Example GAMULTOBJ (can handle Multiple Objective)  GA(can handle 1 objective) Constrained Minimization Problem We want to minimize a simple fitness function of two variables x1 and x2 min f(x) = 100 * (x1^2 - x2) ^2 + (1 - x1)^2; x min f(x) = 100 * (x1^2 + x2) ^2 + (1 + x1)^2; x such that the following two nonlinear constraints and bounds are satisfied x1*x2 + x1 - x2 + 1.5 <...

Feature subset selection Using Genetic Algorithm in MATLAB

function callGeneticAlgo global mat global trainInd global testInd [trainInd,~,testInd] = dividerand(1420,0.7,0,0.3); global counter global errList counter = 1; errList = []; fileName=  '../features/alltopPNPDMF.feature' ; mat = load(fileName); [x,fval,exitflag,output,population,score] = gaFeaSelection(1588,100,10800); % param1 = #feature excludig label % param2 =  population size % param3 = sec to test (3 hour = 10800 sec) dlmwrite('selected.GA',x,'delimiter','\n'); display('Done'); end function [x,fval,exitflag,output,population,score] = gaFeaSelection (nvars,PopulationSize_Data,TimeLimit_Data) % This is an auto generated MATLAB file from Optimization Tool. % Start with the default options options = gaoptimset; % Modify options setting options = gaoptimset(options,'PopulationType', 'bitString'); options = gaoptimset(options,'PopulationSize', PopulationSize_Data); options = gaoptimset(options,'TimeLimit', T...