Skip to main content

MATLAB som

% 1 4 5 7 -- sample 1
% 2 5 6 6 -- sample 2

% in cndx = you will get  the position of each training sample in cluster no
% as SOM takes each sample as a column we need to transpose feature before applying on SOM

%  -------  train -----------
trainFeature = [ 1 2  ; 4 5    ; 5 6 ; 7 6 ]; % Two training data with 4 feature each . So each column is a sample
net = newsom(  trainFeature'  ,[2 2] ); % 4 cluster
net.trainParam.epochs = 100;
[net,tr,Y,E,Pf,Af] = train(net, trainFeature'  );
distances = dist( trainFeature  ,net.IW{1}' );
[d,cndx_train ] = min(distances,[],2); % cndx gives the cluster index, d gives the distance
cndx_train % the position of each training sample in cluster no


bookKeep=zeros(SOMd1 * SOMd2 ,2);%one column for +ve,one column for -ve
    for c=1: totTrain
        index = cndx_train(c);
        if(c<=totPosTrain)
            bookKeep( index,1) = bookKeep( index,1) + 1; %+ve sample
        else
            bookKeep( index,2) = bookKeep( index,2) + 1; %-ve sample
        end
    end
    dlmwrite('clusterInfo',bookKeep,'\t');

% ---------  test  -------------

out = sim(net, featureInTest' );
% format: clusterNo*testSample : 0 0 0 1 0 0 0 0 % that means 4th sample in this custer;
% outFormat (clusterNo, serial )

     out = out'; % #testSample * clusterNo 
    dlmwrite('out',out,'\t');
    for c=1:totTest
      
        clusterNo(c) = find( out(c ,:) );     
        if c<=totPosTest
            origLabel(c) = 1;
        else
            origLabel(c) = -1;
        end
      
    end
    dlmwrite('predicted',[origLabel' clusterNo'],'\t');

Comments

Popular posts from this blog

MATLAB cross validation

// use built-in function samplesize = size( matrix , 1); c = cvpartition(samplesize,  'kfold' , k); % return the indexes on each fold ///// output in matlab console K-fold cross validation partition              N: 10    NumTestSets: 4      TrainSize: 8  7  7  8       TestSize: 2  3  3  2 ////////////////////// for i=1 : k    trainIdxs = find(training(c,i) ); %training(c,i);  // 1 means in train , 0 means in test    testInxs  = find(test(c,i)       ); % test(c,i);       // 1 means in test , 0 means in train    trainMatrix = matrix (  matrix(trainIdxs ), : );    testMatrix  = matrix (  matrix(testIdxs  ), : ); end //// now calculate performance %%  calculate performance of a partition     selectedKfoldSen=[];selectedKfoldSpe=[];selectedKfoldAcc=[];     indexSen=1;indexSpe=1;indexAcc=1;     if ( kfold == (P+N) )% leave one out         sensitivity = sum(cvtp) /( sum(cvtp) + sum(cvfn) )         specificity = sum(cvtn) /( sum(cvfp) + sum(cvtn) )         acc

R tutorial

Install R in linux ============ In CRAN home page, the latest version is not available. So, in fedora, Open the terminal yum list R  --> To check the latest available version of r yum install R --> install R version yum update R --> update current version to latest one 0 find help ============ ?exact topic name (  i.e.   ?mean ) 0.0 INSTALL 3rd party package  ==================== install.packages('mvtnorm' , dependencies = TRUE , lib='/home/alamt/myRlibrary/')   #  install new package BED file parsing (Always use read.delim it is the best) library(MASS) #library(ggplot2) dirRoot="D:/research/F5shortRNA/TestRIKEN/Rscripts/" dirData="D:/research/F5shortRNA/TestRIKEN/" setwd(dirRoot) getwd() myBed="test.bed" fnmBed=paste(dirData, myBed, sep="") # ccdsHh19.bed   tmp.bed ## Read bed use read.delim - it is the  best mybed=read.delim(fnmBed, header = FALSE, sep = "\t", quote = &q