Commit ce8d6d09 authored by Lucianna Osucha's avatar Lucianna Osucha

Update util/AminoAcidChain to use standard strings instead of mRNA-esque

indexing method.

Update protein_targeting_and_localization/README.MD to reflect changes
to submodule programming paradigm
parent d7354de4
# protein_targeting_and_localization
#
# Module for finding protein localization/targeting strings in strings of Amino Acids
# !!PRE-PRE-ALPHA!! Lots of TODO. Currently untested
# !!PRE-PRE-ALPHA!! Lots of #TODO. Currently untested
#
# Author: Lucianna Osucha (email:lucianna@vulpinedesigns.com)
**Contents:**
-*protloc.py* Main end user program
-*modTargetStrings.py* Utility interface. Feed files to this to add to the datfile.
-*example_input.txt* An example of input, ready to feed to modTargetStrings.py
-*protloc.py* Main end user program
-*locStrings.txt* Alphabetized list of known localization strings
*modTargetStrings.py* comes complete with a helper utility. Use -h or --help to access it.
# AminoAcids.py
#
# Utils for dealing with Amino Acid Chains
# Utils for dealing with Amino Acid Chains along with a class to represent them
#
# Authored by: Lucianna Osucha (lucianna@vulpinedesigns.com)
import mRNA
metadataTypes:list = ['originMRNA', 'locFlags']
aminoAlphabet:list = ['*', 'R', 'H', 'K', 'D', 'E', 'S', 'T', 'N', 'Q', 'C', 'U', 'G'
, 'P', 'A', 'V', 'I', 'L', 'M', 'F', 'Y', 'W']
# All codons from AAA-CCC and the Amino Acid index (in aminoAlphabet) they point to
aminoCodons:list = [3, 8, 3, 8, 16, 16, 18, 16, 1, 6, 1, 6, 7, 7, 7, 7, 0, 20
, 0, 20, 17, 19, 17, 19, 0, 10, 21, 10, 6, 6, 6, 6, 5, 4, 5, 4, 15, 15, 15
, 15, 12, 12, 12, 12, 14, 14, 14, 14, 9, 2, 9, 2, 17, 17, 17, 17, 1, 1, 1, 1
, 13, 13, 13, 13]
# Take a plain text Amino Acid Chain and return a list of the indexed form
def indexAminoChain(seq:str) -> list:
result:list = []
for i in seq:
result.append(aminoAlphabet.index(i))
return result
# Take an indexed Amino Acid Chain and return a list of the plain text form
def deindexAminoChain(seq:list) -> str:
result = '';
for i in seq:
result += aminoAlphabet[i]
return result;
# What codon indexes from AAA-CCC will translate into
aminoCodons = "KNKNIIMIRSRSTTTT*Y*YLFLF*CWCSSSSEDEDVVVVGGGGAAAAQHQHLLLLRRRRPPPP"
# Take a string of RNA and return the Amino Acid Chain (in index form) it
# will translate into
def convertRNA(seq:list) -> list:
# Take a coding sequence and return the Amino Acid Chain it will translate into
def convertRNA(seq:list, stcodon =0) -> list:
result:list = []
for i in range(0, len(seq), 3):
result.append(aminoCodons[seq[i]])
amino = aminoCodons[seq[i]]
if (amino == '*')
break
result.append(amino)
return result
class AminoAcidChain:
# Instantiates a new AminoAcidChain object using either plain text or
# preindexed input
def __init__(self, seq):
self.metadata:list = []
if isinstance(seq, str):
seq = indexAminoChain(seq)
if isinstance(seq, list):
self.sequence:list[int] = seq
else:
raise TypeError("'seq' must be of type 'str' or 'list'")
# Instantiates a new AminoAcidChain object using plain text
def __init__(self, seq:string):
self.sequence:string = seq
self.length:int = len(seq)
self.metadata:list = []
# Converts an mRNA object, a string of RNA in plain text, or an RNA
# index list, to a new aminoAcidChain
# Takes
# an mRNA object, a string of RNA in plain text, or an RNA index list
# and
# the codon index to begin reading from
# and returns a new AminoAcidChain
@classmethod
def fromMRNA(cls, seq):
def fromCodingSequence(cls, seq, stcodon =0):
if isinstance(seq, mRNA.mRNA):
seq = seq.code
if isinstance(seq, str):
seq = indexRNA(seq)
if isinstance(seq, list):
return cls(convertRNA(seq))
return cls(convertRNA(seq, stcodon))
else:
raise TypeError("'seq' must be of type 'mRNA', 'str', or 'list'")
def __str__(self) -> str:
return deindexAminoChain(self.sequence)
return self.sequence + "\n\n" + self.metadata
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment