function identifyDuplicate
clc;
uniqueSeq={};
dupSeq={};
index=1;
uniqueIndex=1;
dupIndex=1;
uniq=[];
dup=[];
isDuplicated = 0;
fid = fopen('1400M_from_287PS_287NS.ranked','r');
tline = fgetl(fid); % ******
while ischar(tline)
consensusSeq = fgetl(fid); % Consessus: AAACC
consensusSeq = upper(consensusSeq);
curSeq = sscanf(consensusSeq,'%*s %s', [1, inf]);
curSeq = upper(curSeq);
fgetl(fid); % Threshold
fgetl(fid); % Coverage
fgetl(fid); % p-value
fgetl(fid); % r1
fgetl(fid); % r2
fgetl(fid); % r3
fgetl(fid); % r4
isExist=0;
for en=1:uniqueIndex -1
exist = strcmp(curSeq,uniqueSeq{en})
if exist ==1
isDuplicated = 1;
break;
end
end
if( isDuplicated == 1 ) % already exist
dupSeq{dupIndex} = {curSeq};
dupIndex = dupIndex + 1;
dup = [dup;index];
else % not found
uniqueSeq{uniqueIndex} = {curSeq};
uniqueIndex = uniqueIndex + 1;
uniq = [ uniq;index];
end
tline = fgetl(fid); % next ******
index = index + 1;
isDuplicated = 0;
end
dlmwrite('unique',uniq,'\t'); % index of unique entry
dlmwrite('dup' ,dup ,'\t'); % index of duplicate entry
fclose(fid);
clc;
uniqueSeq={};
dupSeq={};
index=1;
uniqueIndex=1;
dupIndex=1;
uniq=[];
dup=[];
isDuplicated = 0;
fid = fopen('1400M_from_287PS_287NS.ranked','r');
tline = fgetl(fid); % ******
while ischar(tline)
consensusSeq = fgetl(fid); % Consessus: AAACC
consensusSeq = upper(consensusSeq);
curSeq = sscanf(consensusSeq,'%*s %s', [1, inf]);
curSeq = upper(curSeq);
fgetl(fid); % Threshold
fgetl(fid); % Coverage
fgetl(fid); % p-value
fgetl(fid); % r1
fgetl(fid); % r2
fgetl(fid); % r3
fgetl(fid); % r4
isExist=0;
for en=1:uniqueIndex -1
exist = strcmp(curSeq,uniqueSeq{en})
if exist ==1
isDuplicated = 1;
break;
end
end
if( isDuplicated == 1 ) % already exist
dupSeq{dupIndex} = {curSeq};
dupIndex = dupIndex + 1;
dup = [dup;index];
else % not found
uniqueSeq{uniqueIndex} = {curSeq};
uniqueIndex = uniqueIndex + 1;
uniq = [ uniq;index];
end
tline = fgetl(fid); % next ******
index = index + 1;
isDuplicated = 0;
end
dlmwrite('unique',uniq,'\t'); % index of unique entry
dlmwrite('dup' ,dup ,'\t'); % index of duplicate entry
fclose(fid);
Comments
Post a Comment