Skip to main content

Feature subset selection Using Genetic Algorithm in MATLAB


function callGeneticAlgo

global mat
global trainInd
global testInd
[trainInd,~,testInd] = dividerand(1420,0.7,0,0.3);
global counter
global errList
counter = 1;
errList = [];
fileName=  '../features/alltopPNPDMF.feature' ;
mat = load(fileName);

[x,fval,exitflag,output,population,score] = gaFeaSelection(1588,100,10800); % param1 = #feature excludig label
% param2 =  population size
% param3 = sec to test (3 hour = 10800 sec)

dlmwrite('selected.GA',x,'delimiter','\n');

display('Done');

end


function [x,fval,exitflag,output,population,score] = gaFeaSelection(nvars,PopulationSize_Data,TimeLimit_Data)
% This is an auto generated MATLAB file from Optimization Tool.

% Start with the default options
options = gaoptimset;
% Modify options setting
options = gaoptimset(options,'PopulationType', 'bitString');
options = gaoptimset(options,'PopulationSize', PopulationSize_Data);
options = gaoptimset(options,'TimeLimit', TimeLimit_Data);
options = gaoptimset(options,'MutationFcn', {  @mutationuniform [] });
options = gaoptimset(options,'Display', 'iter');
options = gaoptimset(options,'PlotFcns', { @gaplotbestf });
[x,fval,exitflag,output,population,score] = ...
ga(@feaSelobjFun,nvars,[],[],[],[],[],[],[],options);

end



function [ evalValue ] = feaSelobjFun( x )
%FEASELOBJFUN Summary of this function goes here
%   Detailed explanation goes here
global mat
global trainInd
global testInd
global counter
global errList

Data = mat(:,1:end-1);
Label = mat(: , end);
selectedFeature = Data(:,(x~=0))*diag(x(x~=0));

svmStruct = svmtrain(selectedFeature(trainInd,:),Label(trainInd),...
            'kernel_function','linear'  ,...
            'method' , 'SMO' , 'kktviolationlevel',.55);
predictedOut = svmclassify(svmStruct,selectedFeature(testInd,:));

[X,Y,Thr,AUC] = perfcurve(Label(testInd),predictedOut,1);
 evalValue = 1-AUC;
errList(counter) = fval;
counter = counter + 1;

end



Comments

  1. Hi... This is feature subset selection or feature selection ??? can you provide us a sample output please..

    ReplyDelete
  2. please can you put alink to download source files

    ReplyDelete

Post a Comment

Popular posts from this blog

MATLAB cross validation

// use built-in function samplesize = size( matrix , 1); c = cvpartition(samplesize,  'kfold' , k); % return the indexes on each fold ///// output in matlab console K-fold cross validation partition              N: 10    NumTestSets: 4      TrainSize: 8  7  7  8       TestSize: 2  3  3  2 ////////////////////// for i=1 : k    trainIdxs = find(training(c,i) ); %training(c,i);  // 1 means in train , 0 means in test    testInxs  = find(test(c,i)       ); % test(c,i);       // 1 means in test , 0 means in train    trainMatrix = matrix (  matrix(trainIdxs ), : );    testMatrix  = matrix (  matrix(testIdxs  ), : ); end //// now calculate performance %%  calculate performance of a partition     selectedKfoldSen=[];selectedKfoldSpe=[];selectedKfoldAcc=[];     indexSen=1;indexSpe=1;indexAcc=1;     if ( kfold == (P+N) )% leave one out         sensitivity = sum(cvtp) /( sum(cvtp) + sum(cvfn) )         specificity = sum(cvtn) /( sum(cvfp) + sum(cvtn) )         acc

R tutorial

Install R in linux ============ In CRAN home page, the latest version is not available. So, in fedora, Open the terminal yum list R  --> To check the latest available version of r yum install R --> install R version yum update R --> update current version to latest one 0 find help ============ ?exact topic name (  i.e.   ?mean ) 0.0 INSTALL 3rd party package  ==================== install.packages('mvtnorm' , dependencies = TRUE , lib='/home/alamt/myRlibrary/')   #  install new package BED file parsing (Always use read.delim it is the best) library(MASS) #library(ggplot2) dirRoot="D:/research/F5shortRNA/TestRIKEN/Rscripts/" dirData="D:/research/F5shortRNA/TestRIKEN/" setwd(dirRoot) getwd() myBed="test.bed" fnmBed=paste(dirData, myBed, sep="") # ccdsHh19.bed   tmp.bed ## Read bed use read.delim - it is the  best mybed=read.delim(fnmBed, header = FALSE, sep = "\t", quote = &q