Commit 524d48bf authored by Lucianna Osucha's avatar Lucianna Osucha

Minor refactoring, major documentation update.

Change from double to float compression in map_aic
parent b5fbaed8
# Helper utility for loading util directory. Python sucks at this so
# we need one of these in every folder which needs access to anything
# in a higher or adjacent directory.
import os
import sys
mypath = os.path.dirname(os.path.abspath(__file__))
......
......@@ -13,7 +13,7 @@ from struct import *
try:
m_weights = open(mypath + "/codonWeights.dat", "rb")
m_weights = open(mypath + "/codonWeights.dat", "rb") # rb = read bytecode
except OSError as e:
print("[!!FATAL!!] Error opening \"codonWeights.dat\"\n"
, file=sys.stderr)
......@@ -24,7 +24,8 @@ except OSError as e:
def mapAICs(rna:mRNA.mRNA) -> mRNA.mRNA:
for i in rna.code:
m_weights.seek(i * 8)
rna.baseWeights.append(unpack('<d', m_weights.read(8))[0])
# Go to the desired entry in datafile
m_weights.seek(i * 4)
rna.baseWeights.append(unpack('<f', m_weights.read(8))[0])
return rna
......@@ -14,20 +14,23 @@ filename = mypath + '/codonWeights.dat'
try:
m_file = open(filename,'r+b')
m_file = open(filename,'+b') # update binary file
except OSError as e:
print("\"" + filename + "\" not found!\n"
+ "Constructing new file...", file=sys.stderr)
m_file = open(filename,'x+b')
# Pack 64 doubles, all set to 0.0, into a bytearray and write to the file
m_file.write(bytearray(pack('<' + 'd'*64, *([0.0]*64))))
m_file = open(filename,'x+b') # create and update bytecode file
# Pack 64 little endian floats, all set to 0.0, into a bytearray and
# write to the file
m_file.write(bytearray(pack('<' + 'f'*64, *([0.0]*64))))
sys.exit()
parser = argparse.ArgumentParser(description="A helper utility for populating "
+ "the \"" + filename + "\" file"
, epilog="File Formatting Example:\n\nAUG 1000\nCUG 140\nAUC 15.3"
, formatter_class=argparse.RawDescriptionHelpFormatter)
, epilog="For an example of the file format, run the program without any "
+ "arguments (line order can be arbitrary, no upper or lower limit on "
+ "file size). New entries or later entries in the file override older ones"
, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('infile', nargs='*', type=argparse.FileType('r')
, help="Read weights stored in a file and update \""+ filename + "\" with "
......@@ -37,10 +40,10 @@ parser.add_argument('infile', nargs='*', type=argparse.FileType('r')
args = parser.parse_args()
# If empty, print the current weights in the same format they are input
if args.infile == []:
for i in range(64):
print(mRNA.deindexCodon(i) + " ", *unpack('<d', m_file.read(8)))
print(mRNA.deindexCodon(i) + " ", *unpack('<f', m_file.read(8)))
sys.exit()
......@@ -50,5 +53,7 @@ for n_file in args.infile:
codon = n_file.read(4)
if codon == None or codon == "":
break
m_file.seek(mRNA.indexCodon(codon) * 8)
m_file.write(bytearray(pack('<d', float(n_file.readline()))))
# Index RNA and use this value to find desired entry in datafile
m_file.seek(mRNA.indexCodon(codon) * 4)
# Pack weight into a little-endian float and write
m_file.write(bytearray(pack('<f', float(n_file.readline()))))
......@@ -16,39 +16,35 @@ filename = os.path.abspath(mypath + 'locStrings.dat')
try:
m_file = open(filename,'+b')
m_file = open(filename,'+')
except OSError as e:
print("\"" + filename + "\" not found!\n"
+ "Constructing new file...", file=sys.stderr)
m_file = open(filename,'wb')
m_file = open(filename,'w')
newfile = 1
parser = argparse.ArgumentParser(description="A helper utility for populating "
+ "the \"" + filename + "\" file"
, epilog="File Formatting Example:\n\nAUG 1000\nCUG 140\nAUC 15.3" #TODO
, formatter_class=argparse.RawDescriptionHelpFormatter)
+ "the \"" + filename + "\" file", fromfile_prefix_chars='@'
, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('infile', nargs='*', type=argparse.FileType('r') #TODO
, help="Read strings stored in a file and update \"" + filename + "\" with "
+ "the new data. Leave empty to print out currently stored data")
parser.add_argument('sequence', nargs='*', help="Add \"sequence\" to the datafile"
, dest='toadd')
parser.add_argument('-r', '--remove', action='append', help="Instead of adding"
+ ", remove from the datafile", dest='toremove')
args = parser.parse_args()
parser.add_argument('-p', '--print-args', dest="print", action='store_true')
args = parser.parse_args()
if args.infile == []:
if newfile:
sys.exit()
while m_file.readable():
print(deindexAminoChain(*unpack('<c', m_file.read(8))) + '\n')
sys.exit()
filedata = m_file.readlines()
for seq in toadd:
tmp = indexAminoChain(seq)
m_file.seek(0, SEEK_END)
for n_file in args.infile:
while n_file.readable():
seq = indexAminoChain(n_file.readline())
for i in seq:
m_file.write(bytearray(pack('<c', i)))
if args.print:
m_file.read()
sys.exit()
......@@ -12,12 +12,14 @@ import mRNA
metadataTypes:list = ['originMRNA', 'locFlags']
aminoAlphabet:list = ['*', 'R', 'H', 'K', 'D', 'E', 'S', 'T', 'N', 'Q', 'C', 'U', 'G'
, 'P', 'A', 'V', 'I', 'L', 'M', 'F', 'Y', 'W']
# All codons from AAA-CCC and the Amino Acid index (in aminoAlphabet) they point to
aminoCodons:list = [3, 8, 3, 8, 16, 16, 18, 16, 1, 6, 1, 6, 7, 7, 7, 7, 0, 20
, 0, 20, 17, 19, 17, 19, 0, 10, 21, 10, 6, 6, 6, 6, 5, 4, 5, 4, 15, 15, 15
, 15, 12, 12, 12, 12, 14, 14, 14, 14, 9, 2, 9, 2, 17, 17, 17, 17, 1, 1, 1, 1
, 13, 13, 13, 13]
# Take a plain text Amino Acid Chain and return a list of the indexed form
def indexAminoChain(seq:str) -> list:
result:list = []
for i in seq:
......@@ -25,7 +27,7 @@ def indexAminoChain(seq:str) -> list:
return result
# Take an indexed Amino Acid Chain and return a list of the plain text form
def deindexAminoChain(seq:list) -> str:
result = '';
for i in seq:
......@@ -33,7 +35,8 @@ def deindexAminoChain(seq:list) -> str:
return result;
# Take a string of RNA and return the Amino Acid Chain (in index form) it
# will translate into
def convertRNA(seq:list) -> list:
result:list = []
for i in range(0, len(seq), 3):
......@@ -44,6 +47,8 @@ def convertRNA(seq:list) -> list:
class AminoAcidChain:
# Instantiates a new AminoAcidChain object using either plain text or
# preindexed input
def __init__(self, seq):
self.metadata:list = []
......@@ -56,6 +61,8 @@ class AminoAcidChain:
self.length:int = len(seq)
# Converts an mRNA object, a string of RNA in plain text, or an RNA
# index list, to a new aminoAcidChain
@classmethod
def fromMRNA(cls, seq):
if isinstance(seq, mRNA.mRNA):
......@@ -63,7 +70,6 @@ class AminoAcidChain:
if isinstance(seq, str):
seq = indexRNA(seq)
if isinstance(seq, list):
print(seq)
return cls(convertRNA(seq))
else:
raise TypeError("'seq' must be of type 'mRNA', 'str', or 'list'")
......
......@@ -8,5 +8,6 @@
**Contents:**
-*mRNA.py* Utils for dealing with strands of mRNA
-*mRNA.py* Utils for dealing with strands of mRNA
-*AminoChain.py* Utils for dealing with chains of Amino Acids
-*misc-util.py* What it says on the tin.
......@@ -49,28 +49,19 @@ def indexRNA(code:str) -> list:
class mRNA:
def __init__(self, cd, og:str, pal:int):
def __init__(self, cd):
if isinstance(cd, str):
cd = indexRNA(cd)
if not isinstance(cd, list):
raise TypeError("'cd' must be of type 'str' or 'list'")
self.code:list = cd
self.originGene:str = og
self.polyALength:int = pal
self.baseWeights:list[float] = []
self.kozakStrengths:list[float] = []
self.leakyScanStrengths:list[float] = []
self.baseWeights:list = []
self.localisationSequences:list[tuple] = []
self.hairpins:list[tuple] = []
self.IREs:list[tuple] = []
self.uORFs:list[tuple] = []
self.modifiers:list = []
self.adjustedWeights:list[float] = []
self.processedBy:list[str] = []
self.adjustedWeights:list = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment