Profilo di Doiar

Nome Doiar
Indirizzo email n/a
Messaggi1
  • Script per interrogazione database BLAST
    Forum >> Programmazione Python >> Files e Directory
    ciao a tutti
    ho trovato in un articolo uni script (blast_with_ncbi.py) che permette, fornendo un file di sequenze di geni in formato .fasta, l'interrogazione di un database locale.

    aprendo il file con gedit risulta chiaro che devo modificare alcune variabili, in particolare le directory in cui sono contenuti i vari file.

    tuttavia non riesco a farlo funzionare. credo che l'errore dipenda dal fatto che non so indicare in modo corretto la "workdir":
    # Main directory for work
    workdir = os.path.dirname(os.path.realpath(__file__))


    qualcuno potrebbe aiutarmi.
    riporto di seguito lo script originale e a seguire la parte modificata da me

    #
    # Imports
    import os
    import glob
    from   Bio import SeqIO
    from   Bio.Blast.Applications import NcbiblastnCommandline
     
    #
    # Defines
    # Change these for your specific needs
     
    # Main directory for work
    workdir   = os.path.dirname(os.path.realpath(__file__))
     
    # Local BLAST database and GI filter list
    blast_db  = '%s/../blast-db' % workdir
     
    # FASTA directory, where to find the sequences
    fasta_dir = '%s/../fasta' % workdir
     
    # Output of our BLAST results
    outdir    = '%s/../blast_results' % workdir
     
    #
    # Given a directory this returns a list of fasta files
    # Change if 'fa' is not the extension that you want to find.
    def get_fasta_files(fasta_dir):
        abs_path = os.path.abspath(fasta_dir)
        print('Looking for fasta files in: ',abs_path)
        return glob.glob('%s/*.fasta' % abs_path)
     
    #
    # Returns the output file
    def outfile(dir,file):
        return '%s/%s.csv' % (dir,os.path.splitext(os.path.basename(file))0)
     
    #
    # BLASTS the sequence file against the local database
    def ncbi_blast(in_file,out_file,dbp):
        cur_dir = os.getcwd()
        os.chdir(dbp)
        # Replace the database names with your own local databases
        # ...plus we are using 8 threads so change according to the resources available
        cmd_line = NcbiblastnCommandline(query=in_file, db="'nt_ncbi_plants fpuk'", out=out_file, outfmt="'10 std score stitle'", max_target_seqs=20,num_threads=8)
        cmd_line()
        os.chdir(cur_dir)
     
    #
    # Each fasta file contains a set of sequences that were
    # matched for a given tag. They are either reverse or forward.
    # We blast them to the database.
    def blast_sequences( fastas, odir, db_dir ):
        for file in fastas:
            print( 'Processing: ', file )
            ncbi_blast(file,outfile(odir,file),db_dir)            
     
    # Main
    print('Running')
    os.chdir(workdir)
    fasta_files = get_fasta_files(fasta_dir)
    blast_sequences(fasta_files,outdir,blast_db)
    print('Done')
    
    _______________________

    #
    # Imports
    import os
    import glob
    from   Bio import SeqIO
    from   Bio.Blast.Applications import NcbiblastnCommandline
     
    #
    # Defines
    # Change these for your specific needs
     
    # Main directory for work workdir   = os.path.dirname(os.path.realpath(__file__))
    workdir   = '/home/doiar/Scrivania/home/doiar/Scrivania/pyscript/BLAST-Tools' ('/home/doiar/Scrivania/home/doiar/Scrivania/pyscript/BLAST-Tools/bin' '%(blast_with_ncbi.py)%')
    
     
    # Local BLAST database and GI filter list
    blast_db  = '%s/home/doiar/Scrivania/pyscript/BLAST-Tools/blast-db/' % workdir
    
     
    # FASTA directory, where to find the sequences
    fasta_dir = '%s/home/doiar/Scrivania/pyscript/BLAST-Tools/fasta' % workdir
    
     
    # Output of our BLAST results
    outdir = '%s/home/doiar/Scrivania/pyscript/BLAST-Tools/blast_results' % workdir
    
     
    #
    # Given a directory this returns a list of fasta files
    # Change if 'fa' is not the extension that you want to find.
    def get_fasta_files(fasta_dir): 
        abs_path = os.path.abspath(fasta_dir)
        print('Looking for fasta files in: ',abs_path)
        return glob.glob('%s/*.fasta' % abs_path)
     
    #
    # Returns the output file
    def outfile(dir,file):
        return '%s/%s.csv' % (dir,os.path.splitext(os.path.basename(file))0)
     
    #
    # BLASTS the sequence file against the local database
    def ncbi_blast(in_file,out_file,dbp):
        cur_dir = os.getcwd()
        os.chdir(dbp)
        # Replace the database names with your own local databases
        # ...plus we are using 8 threads so change according to the resources available
        cmd_line = NcbiblastnCommandline(query=in_file, db="'ITSNCBIDB'", out=out_file, outfmt="'10 std score stitle'", max_target_seqs=10,num_threads=8)
        cmd_line()
        os.chdir(cur_dir)
     
    #
    # Each fasta file contains a set of sequences that were
    # matched for a given tag. They are either reverse or forward.
    # We blast them to the database.
    def blast_sequences( fastas, odir, db_dir ):
        for file in fastas:
            print( 'Processing: ', file )
            ncbi_blast(file,outfile(odir,file),db_dir)            
     
    # Main
    print('Running')
    os.chdir(workdir)
    fasta_files = get_fasta_files(fasta_dir)
    blast_sequences(fasta_files,outdir,blast_db)
    print('Done')