Q1.1
Write a function repeats(seq,n). This function receives a sequence and
will print all its subsequences of length n which are present at least twice
in the sequence. So seq(“acgtaaaaacgta”,4) would print
acgt
cgta
aaaa
Solution:
def repeats(seq,n):
subSeq = ""
subSeqResult = ""
count = 0;
numSubSeq = 0;
start =0;
for i in range(0,len(seq)-n+1):
for j in range(0,n):
subSeq += seq[j+i]
# print(subSeq), # comma means no "new line" after print
for k in range(start,len(seq)-n+1):
for l in range(0,n):
if subSeq[l] is seq[l+k]:
count = count+1;
if count >= n:
numSubSeq = numSubSeq+1
count = 0
if numSubSeq >= 2:
subSeqResult += subSeq
subSeqResult += "\n"
numSubSeq = 0
subSeq = ""
start = start+1
# print(subSeqResult)
return subSeqResult
print repeats("acgtaaaaacgta",4)
Q1.1
3rd Exam Question:
Write a function overlap(s1,s2) which returns the length of overlap of the two strings s1 and s2. Here an overlap means that the suffix of s1 is a prefix of s2, or the suffix of s2 is a prefix of s1. For example for the strings ACGGCTGCA and TTACACGGCTG the function should return 7 since the subsequence ACGGCTG is the sufix of the former and prefix of the latter sequence. But it should return 0 for the strings ACGGCTACA and TTACACGGCTG. In theĀ function, it should not matter whether s1 is the prefix or the suffix, it should test both options.
Test it with
#should be 7
print overlap(“TTACACGGCTG”,”ACGGCTGCA”)
#should be 7
print overlap(“ACGGCTGCA”,”TTACACGGCTG”)
#should be 0
Solution:
def overlap(S1,S2):
# S1 = S1
# S2 = S2
S3 = ""
S4 = ""
finalResult = ""
overlapSeque = ""
overlapLength = 0
SuffixLastPoint = 0
firstStep = None
secondStep = None
# print len(S1)
# print len(S2)
#-------------->> prefix Test
for i in range(0,len(S2)):
if SuffixLastPoint == len(S2):
break
if S1[0] == S2[i]:
for j in range(0, len(S2)):
if SuffixLastPoint == len(S2):
#print "Answer:",
#print overlapSeque,
finalResult = overlapSeque + " "+ str(overlapLength)
return finalResult
firstStep = True
break
S3 += S1[0+j]
S4 += S2[i+j]
overlapSeque = S3
overlapLength = j+1
SuffixLastPoint = i+j+1
if S3 != S4:
S3 =""
S4 =""
overlapSeque = 0
overlapLength = ""
break
#S1 ties to Suffix , S2 ties to prefix
if firstStep != True:
#print "try for second"
for i in range(0,len(S1)):
if SuffixLastPoint == len(S1):
break
if S2[0] == S1[i]:
for j in range(0, len(S1)):
if SuffixLastPoint == len(S1):
#print "Answer: Suffix in S1 & Prefix in S2 >> ==",
finalResult = overlapSeque + " "+ str(overlapLength)
return finalResult
secondStep = True
break
S3 += S2[0+j]
S4 += S1[i+j]
overlapSeque = S3
overlapLength = j+1
SuffixLastPoint = i+j+1
if S3 != S4:
S3 =""
S4 =""
overlapSeque = 0
overlapLength = ""
break
if secondStep != True and firstStep != True:
return "No Overlap 0"
print "Overlap Test"
print ""
print overlap("TTACACGGCTG","ACGGCTGCA")
print overlap("ACGGCTGCA","TTACACGGCTG")
print overlap("ACGGCTACA","TTACACGGCTG")
#3