Skip to main content

MATLAB string manipulation



string cell array
================
ar = {'aa';'bbbbb'}



mat2str   (convert number to string)
======================
parameter = '-s 0 -t 0 -c ';
for c=1:2
    nextp = [parameter mat2str(  c) ]
end

string comparison
================
 sa='ab';
 sb='aba';
 t = strcmp(sa , sb )
if( t == 1)
  display('Yest match');
else
   display('No match')
end


strmatch  find exact string in cell array
=======================
list = {'max', 'minimax', 'maximum', 'max'}
x = strmatch('max',list,'exact')

find  string starts with in cell array
=======================
list = {'max', 'minimax', 'maximum', 'max'}
x = strmatch('max',list)

String Comparison
================
 strmatch('ab' , 'abc') ; % return 1
strcmp('ab' , 'abc') ; % return0

Find index of  char/ string strfind  find the pattern
====================
S = 'Find the starting indices of the pattern string';
strfind(S, 'in')
ans =
     2    15    19    45
 
Sub String
===================
 
s = s(1 : n) 

String trim
==========
out = strtrim(ins)

Reading from file  % parse each line
===========================
function createLogo
motifCount=0;
fNamePPM = '../gene.features.v2/v2_C_287_NC_48_profile.model'; % columns as Letter
fid = fopen(fNamePPM);

tline = fgets(fid);
while ischar(tline)
      if tline(1) =='*' % start of a motif    
           motifCount = motifCount + 1;
       
           tline = fgets(fid); % read ID
           %% tokenize the line
           remain  = tline;
           countToken = 0;
           while true
                [word, remain] = strtok(remain);
                if isempty( word  )
                    break;
                end
                countToken = countToken +1;
                if countToken ==2
                   motifLen = str2num(word);
                   profileMatrix = zeros(4,motifLen);
                end
            disp( word  );
           end

        tline = fgets(fid); % score
        tline = fgets(fid); % A
        profileMatrix(1,1:motifLen) = str2num(tline);
     end
     tline = fgets(fid); % to read for next iteration
   
end
fclose(fid);
end

String tokenizer ( strtok )
==============
remain  = wholeString;
    while true
       [word, remain] = strtok(remain);
       if isempty( word  ),  break;  end
       disp( word  );  
    end

% convert tab delimited line into vector of  number
=====================================
myvector = str2num( myLine );

Read file with FIXED COLUMN like matrix but content may be string

fid = fopen('scan1.dat');
C = textscan(fid, '%d\t%f\t%f\t%f\t%f'); % each row have 5 column
C{1} (1) ; % first column , 1st row
C{1} (2) ; % first column , 2nd row
fclose(fid);

[ covTarget covBG score source consensus ] = textread(fnameStat,'%f\t%f\t%f\t%s\t%s');
% here in the file there are 5 columns , first 3 are numeric, and last 2 are string


Write cell Array dlmwrite is not for cell array 


fid = fopen('LLGencodeCommonID.txt', 'w');
fprintf(fid, '%s\n', commonID{2}); % write 2nd column
fclose(fid);



sscanf:  Parsing word inside of a string
=====================
     consensusSeq = fgetl(fid) % Consessus: AAACC
     curSeq = sscanf(consensusSeq,'%*s %s', [1, inf]) % curSeq = AAACC


Comments

Popular posts from this blog

MATLAB cross validation

// use built-in function samplesize = size( matrix , 1); c = cvpartition(samplesize,  'kfold' , k); % return the indexes on each fold ///// output in matlab console K-fold cross validation partition              N: 10    NumTestSets: 4      TrainSize: 8  7  7  8       TestSize: 2  3  3  2 ////////////////////// for i=1 : k    trainIdxs = find(training(c,i) ); %training(c,i);  // 1 means in train , 0 means in test    testInxs  = find(test(c,i)       ); % test(c,i);       // 1 means in test , 0 means in train    trainMatrix = matrix (  matrix(trainIdxs ), : );    testMatrix  = matrix (  matrix(testIdxs  ), : ); end //// now calculate performance %%  calculate performance of a partition     selectedKfoldSen=[];selectedKfoldSpe=[];selectedKfoldAcc=[];     indexSen=1;indexSpe=1;indexAcc=1;     if ( kfold == (P+N) )% leave one out         sensitivity = sum(cvtp) /( sum(cvtp) + sum(cvfn) )         specificity = sum(cvtn) /( sum(cvfp) + sum(cvtn) )         acc

R tutorial

Install R in linux ============ In CRAN home page, the latest version is not available. So, in fedora, Open the terminal yum list R  --> To check the latest available version of r yum install R --> install R version yum update R --> update current version to latest one 0 find help ============ ?exact topic name (  i.e.   ?mean ) 0.0 INSTALL 3rd party package  ==================== install.packages('mvtnorm' , dependencies = TRUE , lib='/home/alamt/myRlibrary/')   #  install new package BED file parsing (Always use read.delim it is the best) library(MASS) #library(ggplot2) dirRoot="D:/research/F5shortRNA/TestRIKEN/Rscripts/" dirData="D:/research/F5shortRNA/TestRIKEN/" setwd(dirRoot) getwd() myBed="test.bed" fnmBed=paste(dirData, myBed, sep="") # ccdsHh19.bed   tmp.bed ## Read bed use read.delim - it is the  best mybed=read.delim(fnmBed, header = FALSE, sep = "\t", quote = &q

MATLAB confusion matrix

%  test_class  & predicted_class must be same dimension % 'order' - describes the order of label. Here labels are 'g' as positive and 'h' as negative [C,order] = confusionmat( test_class(1: noSampleTest), predicted_class, 'order', ['g' ;'h'] ) tp = C(1,1); fn = C(1,2); fp = C(2,1); tn = C(2,2); sensitivity = tp /( tp + fn ) specificity = tn /( fp + tn ) accuracy = (tp+tn) / (tp+fn+fp+tn) tpr = sensitivity fpr = 1-specificity precision = tp /( tp + fp ) fVal = (2*tpr*precision)/(tpr+precision)