Thursday, September 29, 2011

matlab matrix to svm format conversion


inputFormat
============
28.7967,16.0021,2.6449,0.3918,0.1982,27.7004,22.011,-8.2027,40.092,81.8828,g

svmFormat
=============
g    1:28.796700    2:16.002100    3:2.644900    4:0.391800    5:0.198200    6:27.700400    7:22.011000    8:-8.202700    9:40.092000    10:81.882800

code
=======

fid = fopen('magic04.data','r');
raw_data = textscan(fid,'%f %f %f %f %f %f %f %f %f %f  %c','delimiter',',');
data = [raw_data{1:10}];
class = raw_data{11};
fclose(fid);

svmTrain = fopen( 'magic04.data.svm' ,'w');
noRow = size(data,1);
noCol = size(data,2);
for r=1:noRow
    label = class(r);
    fprintf(svmTrain,'%s\t',label );
    for c=1:noCol
            fprintf(svmTrain,'%d:%f\t',c,data(r,c) );
    end
    fprintf(svmTrain,'\n' );
end
fclose(svmTrain);

Tuesday, September 27, 2011

MATLAB file operation


 ============== TEXTSCAN=================
fid = fopen('magic04.data','r');
raw_data = textscan(fd,'%f %f %f %f %f %f %f %f %f %f  %c','delimiter',',');
data = [raw_data{1:10}];
class = raw_data{11};
fclose(fid);


============== FGETL , FPRINTF =================
fid = fopen('positive.seq','r');
fidTrain = fopen('randomTrain.seq','w');

 tline = fgetl(fid);
 while ischar(tline)
     disp(tline);
      fprintf(fidTrain ,'%s\n',upper(tline));
     tline = fgetl(fid);
 
 end

fclose(fidTrain );
fclose(fid);

Saturday, September 24, 2011

MATLAB model evaluation sensitivity/specificity/tpRate/fpRate



Confusion matrix
================
% 1- +ve class
% 0  -ve class
% if you change order change the 'order' parameter in confusionmat function

[C,order] = confusionmat( originalOut , predictedOut,'order', [1 0])
sensitivity = C(1,1)/(C(1,1) + C(1,2))
specificity = C(2,2)/(C(2,1) + C(2,2))

MATLAB decision tree classregtree both classification and regresstion


matrixTrain = load('primate.train' );
featureInTrain = matrixTrain( :, 1:end-1);
featureOutTrain = matrixTrain(:,end);

matrixTest = load('primate.test' );
featureInTest = matrixTest( :, 1:end-1);
featureOutTest = matrixTest(:,end);


% tree
% t = classregtree(featureInTrain,featureOutTrain,'method','classification');
% predictedOut =str2double( eval(t,featureInTest))

%tree bagger
bnew = TreeBagger(10 ,featureInTrain , featureOutTrain, 'Method','classification') % for 10 tree
predictedOut = predict(bnew, featureInTest)
predictedOut = str2double(predictedOut)


t = bnew.Trees{1,1}
t =bnew.Trees{1,2}
t =bnew.Trees{1,3}
... ... ...
t =bnew.Trees{1,10}



Wednesday, September 21, 2011

MATLAB ANN artificial neural network train test


sample1: 1 2 3 4  label: A
sample2: 1 5 7 7  label: B

  Every sample must be put in a column
=============================
featureIn
1 1
2 5
3 7
4 7
featureOut
A
B

function [yPredict] = doBP(trainFeature,trainValue)

trainFeature = trainFeature'; % to fit matlab format
trainValue = trainValue';% to fit matlab format



% % version 2010a
 net=newff(trainFeature,trainValue,[13 1],{'tansig' 'purelin'}); % tansig purelin

% version 2009a
% net=newff(trainFeature,trainValue,[13 1]);
% net.layers{1}.transferFcn = 'tansig';
% net.layers{2}.transferFcn = 'purelin';



net=init(net);

net.trainParam.epochs = 99999999;
net.trainParam.goal = 0.0000001; %(stop training if the error goal hit)
net.trainParam.lr= 0.000001; % (learning rate, not default trainlm) [0.01]
net.trainParam.epochs = 99999999;
net.trainParam.goal = 0.0000001; %(stop training if the error goal hit)
net.trainParam.lr= 0.000001; % (learning rate, not default trainlm) [0.01]
% net.trainParam.lr_dec = 0.000001;
% net.trainParam.mc = 0.9;
% net.trainParam.min_grad = 1e-10;
net.trainParam.show=1 ; %(no. epochs between showing error) [25]
net.trainParam.time =100000; %    (Max time to train in sec) [inf]
net.trainFcn = 'trainlm'; % trainrp trainbfg  trainlm

net.divideParam.trainRatio = 80/100;  % Adjust as desired
net.divideParam.valRatio = 20/100;  % Adjust as desired
net.divideParam.testRatio = 0/100;  % Adjust as desired


% TRAIN
[net,tr,Ytrain,E,Pf,Af] = train(net,trainFeature,trainValue);  %train(net,subset_active_input',subset_active_output');
plotperf(tr);

save net; % will save the network (net) as net.mat.

end

function doTesting(testFeatureIn)

      testFeatureIn = testFeatureIn';  % to fit matlab format
      testFeatureOut = testFeatureOut';  % to fit matlab format
      load net ;% will retrive the network and put it in your workspace

      [predictedY,Pf,Af,E,perf] = sim(net,testFeatureIn);
end

Tuesday, September 20, 2011

C/C++ strtok string tokenizer

    
    char delims[] = "\t \n";
    char *result = NULL; // always hold the token serially

    result = strtok( curline, delims ); // get first token
    count = 0;
    while( result != NULL ) {
          count++;
        switch(count){
            case 1:
               strcpy(chromosomename, result);
               break;
            case 2:
               strcpy(sStart,result);
               startIndex=atoi(result);
               break;
            default:
               fprintf(fppromotor,"%s\t",result);
               break;
        }

        result = strtok( NULL, delims ); // get next token

    }

Monday, September 19, 2011

File Operation in C++

    ifstream  fpA;

    fpA.open( "data",    ios::in );




    fpA.close();


Sunday, September 18, 2011

MATLAB load textscan or save a matrix

load
============
matrix = load('engine.train');


textscan
==========================

fd = fopen('magic04.data','r');
raw_data = textscan(fd,'%f %f %f %f %f %f %f %f %f %f  %c','delimiter',',');
data = [raw_data{1:10}];
class = raw_data{11};

saving
=============

dlmwrite('engine.train',[trainFeatureIn trainFeatureOut] , '\t');