In [1]:
# BioPython fashioned after BioPerl
# Provides a collection of bioinformatic utilities
import Bio

In [2]:
from Bio.Seq import Seq

#create a sequence object
my_seq = Seq('CATGTAGACTAG')

In [3]:
print(my_seq)

CATGTAGACTAG


In [5]:
print('sequence %s is %i bases long' % (my_seq, len(my_seq)))

sequence CATGTAGACTAG is 12 bases long


In [6]:
print('reverse complement is %s' % my_seq.reverse_complement())

reverse complement is CTAGTCTACATG


In [7]:
print('protein translation is %s' % my_seq.translate())

protein translation is HVD*


In [8]:
my_seq

Seq('CATGTAGACTAG')

In [9]:
from Bio import SeqIO
handle = open("../data/ls_orchid.fasta")
for seq_record in SeqIO.parse(handle, "fasta") :
    print(seq_record.id)
    print(repr(seq_record.seq))
    print(len(seq_record))
handle.close()

gi|2765658|emb|Z78533.1|CIZ78533
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGATGAGACCGTGG...GGG', SingleLetterAlphabet())
140


In [None]:
from Bio import Entrez
from Bio import SeqIO
Entrez.email = "giri.n.fiu@gmail.com"
handle = Entrez.efetch(db="nucleotide", rettype="gb", id="EU490707", retmode="text")
# print(handle.read())
# handle = Entrez.efetch(db="protein", rettype="gb", id="6273291", retmode="text")
# handle = Entrez.efetch(db="protein", rettype="fasta", id="6273291")
# seq_record = SeqIO.read(handle, "gb")
print(handle.read())
handle.close()

In [None]:
handle = Entrez.efetch(db="nucleotide", rettype="gb", id="EU490707,EU490707", retmode="text")
print(handle.read())

In [None]:
# Convert GenBank format to fasta
from Bio import SeqIO
records = SeqIO.parse("../Data/ls_orchid.gbk", "genbank")
count = SeqIO.write(records, "my_example.fasta", "fasta")
print("Converted %i records" % count)

In [None]:
for record in SeqIO.parse("my_example.fasta", "fasta"):
    print(record.id)

In [None]:
from Bio import SeqIO
with open("my_example.fasta", "r") as handle:
    for record in SeqIO.parse(handle, "fasta"):
        print(record.id)

In [None]:
from Bio import SeqIO
records = list(SeqIO.parse("my_example.fasta", "fasta"))
print(records[0].id)  # first record
print(records[-1].id)  # last record
print(records[7].id)  # 8th record

In [None]:
from Bio import SeqIO
record_dict = SeqIO.to_dict(SeqIO.parse("my_example.fasta", "fasta"))
print(record_dict["Z78525.1"])  # use the record ID

In [None]:
from Bio import SeqIO
with open("../Data/opuntia.aln", "r") as handle:
    for record in SeqIO.parse(handle, "clustal") :
        print(record.id)

In [None]:
# Tutorial: http://biopython.org/DIST/docs/tutorial/Tutorial.html
# Documentation: https://biopython.org/wiki/Category%3AWiki_Documentation
# Seq object: https://biopython.org/wiki/Seq
# SeqIO object: https://biopython.org/wiki/SeqIO
# AlignIO: https://biopython.org/wiki/AlignIO

In [None]:
from Bio import AlignIO
alignment = AlignIO.read(open("../Data/PF09395_seed.sth"), "stockholm")
print("Alignment length %i" % alignment.get_alignment_length())
for record in alignment :
    print(record.seq + " " + record.id)