Skip to main content

Python tutorial




How to install different packages:


In MAC, use pip

pip install -U numpy scipy scikit-learn

How to see where the package is installed


In MAC: pip show numpy


Import all

============

import className
OR
from package import className


Code Structure
========================

1.  just write script one line by one

print "ab"
print "cd"

2. Use function (Here you must need a main function):

def myfunc():
    print "Inside fnc ab"

if __name__ == '__main__':
    pass
    myfunc()
    objUtil = Utilities.printString("Hello world")

3. Use like java, class + function +main


myclass.py
--------------------

class LncRNA(object):
    def __init__(self,chr , chrStart , chrEnd):
        self.chr = chr
        self.chrStart = chrStart
        self.chrEnd = chrEnd
     
    def classfunction():
        sOut='';
        sOut+= str(self.chr) + '\t' + str(self.chrStart) + '\t' + str(self.chrEnd);
        return sOut;

caller.py
--------------
def myfunc():
    print "Inside fnc ab"

if __name__ == '__main__':
    pass
    myfunc()
    obj = LncRNA("chr1" , 120 , 230)
    obj.classfunction()

 Like java : Inner class + function +
-----------------------------------------

class myinnter:
          def __init__(self,larray):                self.array = larray
           def fncinner:
                print "hi inner"

def myfnc:
      print "inside fnc"

if __name__ == '__main__':
    myfunc()
    obj = myinner;
   obj.fncinner


Return value  No need to write return in function definition
==============================

def myret():
 return val;


print
============
( like java , strins can be added by +)
( like c  BUT use % instead of , "%d" %i)
============================

print "#lncrna in %s" %chrName + " is: %d" %chrmWiseLNCRNAList.__len__()

==============================================
Array

Completely different from C/Java. It is like list. But only 1 type can be saved 
===============================================

// First way to create array like list
myArr = array('L')
myArr.append(54)
myArr.append(54)
print myArr[i]

// second way to create array like list
arrSize = 5;
myArr = [ 0 for val in range(arrSize)]
print myArr[i]


def arrayProcess():
    noRow = 2
    noCol = 4;
    table= [
            [ 0 for j in range(noCol) ]
                for i in range(noRow)
            ]
    print table
    print 'DONE'
    inc = 1
    for i in range(noRow):
        for j in range(noCol):
            table[i][j]= inc
            inc +=1
         
    for d1 in range(noRow):
        for d2 in range(noCol):
            print str(table[d1][d2]) + '\t',        
        print '\n',

for loop increment or step by 2 other than 1
==============================

for i in range( start, end , inc):
    print "Processing: " + i

for loop indexing x[ i : j] ==> from i to (j-1)
output for both case : 0 1 2
===========================
for i in range(0 ,3 ,1):
    print i,

a = []
a.append(0);a.append(1);a.append(2);a.append(3);
for i in a[0:3]:
    print i,


Basic string operation
=================

def stringProcess():
    s = "Hello World"
    myfields = s.split()
    for field in myfields:
        print field
    print "Str len is:" + str( s.__len__() )

    ####  capital 1st letter ####

    print s.capitalize()
    ##### index find ####

    print "position of ello is: " + str( s.find("ello") )

    ##### substing ####
    print "substring: " + s[ s.find("ello"): 5]

   #### compare####
    if str(chrName).lower().__eq__('chr1') :
            return 0;
     else:
            return 1;

  ######  logical negation #######

   if not str(chr).lower().__eq__(chrmName ):
            continue

    #### concat####
    s = ''
    print s + " how are you?" # CONCAT

    #### replace ####
    newString= s.replace("H", "M")
    print newString
    print s

    ##### trim / strip ####
    print s.strip()
    # check prefix & postfix
    print s.startswith("He")
    print s.endswith("world")


L1. Create class
==============


class Motif(object):
    staticVar=5
    @staticmethod
    def staticFnc():
        print 555

    def __init__(self, id, name , value):
        self.id = id
        self.name = name
        self.value = value
 
    def __str__(self, *args, **kwargs):
        sOut='';
        sOut+= str(self.id) + '\t' + self.name + '\t' + str(self.value);
        return sOut; 
 
    def update(self,inc):
        self.value = self.value + inc


**** calling *******

from com.cbrc.bean.Motif import Motif

obj = Motif(1,'tanvir',10)
obj.update(5)
Motif.staticFnc()
print Motif.staticVar

obj.update(99)
print obj.value

print obj

2. Read from file
==============
from scipy.io.fopen import fopen

def readIt( fnameIn,fnameOut):
    fidOut= open(fnameOut , "w");
    fid = open(fnameIn , "r");
    for curLine in fid:
        fidOut.write(curLine);
    
    fid.close();
    fidOut.close();




def readFileAsString_SkipHeader(fin):

    myreader = csv.reader(fin, delimiter=delimComma);
    next(myreader, None)  # skip the headers
    for row in myreader:
        if not row == []:  # To check empty lines
            curSite = row[0]
            myList.append(curSite)
            # print (curSite);

    f.close();
    return;


def readFileAsString(fin):
    for line in open(fin, 'r'):
        print(line)
        myfields = line.split();
        for field in myfields:
            print(field)
    return;

def readFileAsList( fname):

    myList=[];
    f = open(fname, 'rt');
    myreader = csv.reader(f, delimiter=",");
    for row in myreader:
        if not row == []:  # To check empty lines
            curSite = row[0]
            myList.append(curSite)
            #print (curSite);

    f.close()
    return myList;


Set Operation
===========

def setOperstionFromFile( fname1 , fname2, outFile):

    set1 = set();
    set2 = set();

    f = open(fname1, 'rt');
    myreader = csv.reader(f, delimiter=",");
    for row in myreader:
        if not row == []:  # To check empty lines
            curSite = row[0]
            set1.add(curSite)
    f.close()

    f = open(fname2, 'rt');
    myreader = csv.reader(f, delimiter=",");
    for row in myreader:
        if not row == []:  # To check empty lines
            curSite = row[0]
            set2.add(curSite)


    mybuffer=""

    myunion = set1 | set2 #OR set1.union(set2)
    myIntersection = set1 & set2 #OR set1.intersection(set2)
    myDiffAB = set1 - set2
    myDiffBA = set2 - set1

    mybuffer += "Total element in A  " + str(len(set1)) + "\n"
    mybuffer += "Total element in B  " + str(len(set2)) + "\n"
    mybuffer += "Total element in union " + str(  len(myunion) )  + "\n"
    mybuffer += "Total element in intersection " +  str ( len(myIntersection) ) + "\n"
    mybuffer += "Total element in A-B  " + str( len(myDiffAB) ) + "\n"
    mybuffer += "Total element in B-A  " + str(len(myDiffBA)) + "\n"
    mybuffer += " Common Elemenets: " + "\n"
    for elem in myIntersection:
        mybuffer += elem + "\n"

    writeFileFromBuffer(outFile , mybuffer)


Find word in line
================


def findWordInLine( inputStr, myLine):

    searchResult = re.search( inputStr, myLine, re.M | re.IGNORECASE)
    if searchResult:
        print ("FOUND");
        return True
    else:
        print ("NOT FOUND");
        return False


3. String split with regular expression
========================

import re

myPat= re.compile("[\s]+")
myFields = myPat.split(curLine )
        for col in myFields:
            print col


4. List of tuple
===============

def tupleProcess( fnameIn,fnameOut):
    myPat= re.compile("[\s]+")
    myList = list() # TUPLE OBJECT DECLARATION
 
    fidOut= open(fnameOut , "w");
    fid = open(fnameIn , "r");
    for curLine in fid:
        myFields = myPat.split(curLine );
        chr = myFields[0]
        chrStart = int(myFields[1])
        chrEnd = int(myFields[2])
        myBed = (chr, chrStart, chrEnd) # tuple formation
        myList.append(myBed)
     
    # write data of LIST
    print myList.__len__()
    for curBed in myList:
        fidOut.write( curBed[0] + '\t' + str(curBed[1]) )
        fidOut.write("\n")
  fid.close();

  # write buffer data to File
def writeFileFromBuffer( fnmOut, myBuffer):

    target = open(fnmOut ,"w")
    target.write(myBuffer + '' )
    target.close()




def printList(myList):

    len = myList.__len__();
    for i in range(len) :
        print(myList[i])



def sort_and_rank_list(myList):

    # input = [40, 20, 20, 70, 10, 10, 10]
    list.sort(myList)
    #print(input)


    listSortedValue=[] ;
    listRank=[];

    prevVal=-999999;
    prevRank=0;

    for serial, curValue in enumerate(myList):
         # print ( value , serial  ) #  output[x] = i
         if curValue > prevVal:
             curRank = prevRank+1;
         else:
             curRank = prevRank;
         # print(curValue, curRank)

         listSortedValue.append(curValue)
         listRank.append(curRank)

         prevVal = curValue
         prevRank = curRank


    return listSortedValue, listRank


def rank_list(myListOrig):

    '''
    indices = list(range(len(myList)))
    indices.sort(key=lambda x: myList[x])
    print(indices)

    output = [0] * len(indices)
    for i, x in enumerate(indices):
        output[x] = i
        print( i, x)
    '''
    # input = [40, 20, 20, 70, 10, 10, 10]


    myList = myListOrig.copy()
    list.sort(myList)

    # print(myListOrig)
    # print(myList)

    listSortedValue=[] ;
    listRank=[];

    prevVal=-999999;
    prevRank=0;

    for serial, curValue in enumerate(myList):
         # print ( value , serial  ) #  output[x] = i
         if curValue > prevVal:
             curRank = prevRank+1;
         else:
             curRank = prevRank;
         # print(curValue, curRank)

         listSortedValue.append(curValue)
         listRank.append(curRank)

         prevVal = curValue
         prevRank = curRank


    len = listSortedValue.__len__();
    myDict = dict()
    for i in range(len):
        myDict[ listSortedValue[i] ] = listRank[i]

    return myDict


4 Vector of Objects
=================


class MyClass(object):
    def __init__(self, number):
        self.number = number
if __name__ == "__main__":
   my_objects = []

   for i in range(5):
      my_objects.append(MyClass(i))

   for obj in my_objects:
      print obj.number
==========================
5. Hashtable / Dictionary

# For un-ordered dictionary / oedered hashmap
myHash = dict()
myHash['mykey'] = val
print myHash['mykey']

# For ordered dictionary / oedered hashmap
import collections
dictLinesNR = collections.OrderedDict();



def createDictionary( inFile, indexKey, indexValue , mydelimiter):

    myDict = dict();
    f = open(inFile, 'rt');
    myreader = csv.reader(f, delimiter= mydelimiter);
    for row in myreader:
        if not row==[]: # To check empty lines
            curIndex = row[indexKey]
            curValue = row[indexValue]
            if myDict.__contains__(curIndex):
                # print "Already exist: " + curIndex
                temp=""
            else:
                myDict[curIndex] = curValue  # INSERT NEW ELEM
    print ( " Finally the dictionary has " +  myDict.__len__().__str__() + " entry " );
    f.close();
    return  myDict




==========================

class LncRNA(object):
    def __init__(self,chr , chrStart , chrEnd):
        self.chr = chr
        self.chrStart = chrStart
        self.chrEnd = chrEnd
     
    def __str__(self, *args, **kwargs):
        sOut='';
        sOut+= str(self.chr) + '\t' + str(self.chrStart) + '\t' + str(self.chrEnd);
        return sOut;

   def __getAnyValue__(self, *args, **kwargs):
    return self.chrStart;


def dictionaryProcess( fnameIn,fnameOut):
    myPat= re.compile("[\s]+")
    myDict = dict()
    fidOut= open(fnameOut , "w");
    fid = open(fnameIn , "r");
    for curLine in fid:
        myFields = myPat.split(curLine );
        chr = myFields[0]
        chrStart = int(myFields[1])
        chrEnd = int(myFields[2])
        myRNA = LncRNA(chr, chrStart, chrEnd) # tuple formation
        print myRNA
        if myDict.__contains__(chr):
          print "Already exist: " + chr
    else:
                myDict[chr] =   myRNA      # INSERT NEW ELEM
    # write data of DICTIONARY
    print myDict.__len__()
    for k, v in myDict.iteritems():
        fidOut.write( k + '\t+ v.__getAnyValue__()+ '\t' + v.__str__() )
 
        fidOut.write("\n")

   fid.close();

============
6. SET
mySet = set()
mySet.add(elem)
============

def setProcess( fnameIn,fnameOut):
    myPat= re.compile("[\s]+")
    mySet = set()
    fidOut= open(fnameOut , "w");
    fid = open(fnameIn , "r");
    for curLine in fid:
        myFields = myPat.split(curLine );
        chr = myFields[0]
        chrStart = int(myFields[1])
        chrEnd = int(myFields[2])
        mySet.add(chr)  # INSERT NEW ELEM  

   fid.close();

    mySet2 = set([ 'chr2', 'chrX']); 
    # write data of SET
    print mySet.__len__()
    for elem in mySet:
        fidOut.write( elem )
        fidOut.write("\n")
    fidOut.close();

    sortedSet = sorted ( mySet , key= int )
    uni = mySet | mySet2
    intrsct = mySet & mySet2
    diff = mySet - mySet2



2D Array
==============

Creation and initialize with 0:

myMat = [[0 for x in range( xDimention)] for y in range(yDimension)]

Write
def writeMatrix2D(myMat,  myseperator, outFile):
    print "writing matrix of count"    xSize = len(myMat)
    ySize = len(myMat[0])

    mybuffer = ''    for d1 in range(xSize):
        for d2 in range(ySize):
            if d2== (ySize-1):
                mybuffer += str(myMat[d1][d2])
            else:
                mybuffer += str(myMat[d1][d2]) + myseperator
        mybuffer += "\n"    writeFileFromBuffer(outFile, mybuffer)


Write buffer content to file

def writeFileFromBuffer( fnmOut, myBuffer):

    target = open(fnmOut ,"w")
    target.write(myBuffer + '' )
    target.close()



NUMPY OPERATIONS



Matrix operations


Matrix Creation
matrixNP1 = numpy.matrix(  numpy.zeros(shape=(totState , totHour)  , dtype = float) , dtype=float )

#Sum of Column
sumColumn = matrixNP1.sum(axis=0)

# Sum of Row
sumRow = matrixNP1.sum(axis=1)


# Divide each column by a vector ( e.g. sum of column)
matrixNP1 = matrixNP1/ matrixNP1.sum(axis=1)


# Divide each element  by a scalar
matrixNP1 = matrixNP1/1




Exception Handling


http://stackoverflow.com/questions/4990718/python-about-catching-any-exception



import sys
try:
    f = open('myfile.txt')
    s = f.readline()
    i = int(s.strip())
except IOError as (errno, strerror):
    print "I/O error({0}): {1}".format(errno, strerror)
except ValueError:
    print "Could not convert data to an integer."
except:  # handle all exceptions
    print "Unexpected error:", sys.exc_info()[0]
    raise




Comments

Popular posts from this blog

MATLAB cross validation

// use built-in function samplesize = size( matrix , 1); c = cvpartition(samplesize,  'kfold' , k); % return the indexes on each fold ///// output in matlab console K-fold cross validation partition              N: 10    NumTestSets: 4      TrainSize: 8  7  7  8       TestSize: 2  3  3  2 ////////////////////// for i=1 : k    trainIdxs = find(training(c,i) ); %training(c,i);  // 1 means in train , 0 means in test    testInxs  = find(test(c,i)       ); % test(c,i);       // 1 means in test , 0 means in train    trainMatrix = matrix (  matrix(trainIdxs ), : );    testMatrix  = matrix (  matrix(testIdxs  ), : ); end //// now calculate performance %%  calculate performance of a partition     selectedKfoldSen=[];selectedKfoldSpe=[];selectedKfoldAcc=[];     indexSen=1;indexSpe=1;indexAcc=1;     if ( kfold == (P+N) )% leave one out         sensitivity = sum(cvtp) /( sum(cvtp) + sum(cvfn) )         specificity = sum(cvtn) /( sum(cvfp) + sum(cvtn) )         acc

R tutorial

Install R in linux ============ In CRAN home page, the latest version is not available. So, in fedora, Open the terminal yum list R  --> To check the latest available version of r yum install R --> install R version yum update R --> update current version to latest one 0 find help ============ ?exact topic name (  i.e.   ?mean ) 0.0 INSTALL 3rd party package  ==================== install.packages('mvtnorm' , dependencies = TRUE , lib='/home/alamt/myRlibrary/')   #  install new package BED file parsing (Always use read.delim it is the best) library(MASS) #library(ggplot2) dirRoot="D:/research/F5shortRNA/TestRIKEN/Rscripts/" dirData="D:/research/F5shortRNA/TestRIKEN/" setwd(dirRoot) getwd() myBed="test.bed" fnmBed=paste(dirData, myBed, sep="") # ccdsHh19.bed   tmp.bed ## Read bed use read.delim - it is the  best mybed=read.delim(fnmBed, header = FALSE, sep = "\t", quote = &q