
    DUf                     ^    d dl Z d dlZd dlZd dlZd dlZd dlZd Zd Z G d de          Z	dS )    Nc                  (    t          j                     S )N)time     c/var/www/html/software/conda/lib/python3.11/site-packages/pybedtools/contrib/intersection_matrix.pynowr   	   s    9;;r   c                     t           j                            t           j                            |                     d         S )Nr   )ospathsplitextbasename)fnames    r   get_namer      s-    7BG,,U3344Q77r   c                   H    e Zd ZdZddZddZd Zd Zd Zd	 Z	dd
Z
d ZdS )IntersectionMatrixzE
    Class to handle many pairwise comparisons of interval files
    NFc                    || _         || _        || _        || _        | j        rd|                     |           t          j        |          | _        t
          j        | j        _	        | j        
                                | _        dS dS )a  
        Class to handle and keep track of many pairwise comparisons of interval
        files.

        A lightweight database approach is used to minimize computational time.

        The database stores filenames and calculation timestamps;
        re-calculating a matrix using the same interval files will only
        re-calculate values for those files whose modification times are newer
        than the timestamp in the database.

        `beds` is a list of bed files.

        `genome` is the string assembly name, e.g., "hg19" or "dm3".

        `dbfn` is the filename of the database you'd like to use to track
        what's been completed.

        Example usage:

        First, get a list of bed files to use:
        #>>> beds = [
        #... pybedtools.example_filename(i) for i in  [
        #... 'Cp190_Kc_Bushey_2009.bed',
        #... 'CTCF_Kc_Bushey_2009.bed',
        #... 'SuHw_Kc_Bushey_2009.bed',
        #... 'BEAF_Kc_Bushey_2009.bed'
        #... ]]

        Set some parameters.  "dm3" is the genome to use; info will be stored
        in "ex.db".  `force=True` means to overwrite what's in the database
        #>>> # In practice, you'll want many more iterations...
        #>>> im = IntersectionMatrix(beds, 'dm3',
        #...            dbfn='ex.db', iterations=3, force=True)
        #>>> # Use 4 CPUs for randomization
        #>>> matrix = im.create_matrix(verbose=True, processes=4)
        N)bedsgenomedbfn
iterations_init_dbsqlite3connectconnRowrow_factorycursorc)selfr   r   r   r   forces         r   __init__zIntersectionMatrix.__init__   s}    L 		$9 	(MM%   --DI$+KDI!Y%%''DFFF		( 	(r   c                 B   | j         dS t          j                            | j                   r|sdS t	          j        | j                   }|                                }|r|                    d           |                    d           |	                                 dS )zB
        Prepare the database if it doesn't already exist
        Nz#DROP TABLE IF EXISTS intersections;a  
        CREATE TABLE intersections (
            filea TEXT,
            fileb TEXT,
            timestamp FLOAT,
            actual FLOAT,
            median FLOAT,
            iterations INT,
            self INT,
            other INT,
            fractionabove FLOAT,
            fractionbelow FLOAT,
            percentile FLOAT,
            PRIMARY KEY (filea, fileb, iterations));
        )
r   r
   r   existsr   r   r   executeexecutescriptcommit)r   r    r   r   s       r   r   zIntersectionMatrix._init_dbG   s     9F7>>$)$$ 	U 	Fty))KKMM 	=II;<<<		
 	
 	
" 	r   c                     | j         dS t          | j                            dt	                                          }t          |          dk    rdS t          |          dk    sJ |d         S )z
        Return the sqlite3.Row from the database corresponding to files `fa`
        and `fb`; returns None if not found.
        Nz
                SELECT * FROM intersections
                WHERE
                filea=:fa AND fileb=:fb AND iterations=:iterations
                r      )r   listr   r$   localslen)r   fafbr   resultss        r   get_rowzIntersectionMatrix.get_rowf   sz    
 9FFNN
  	
 	
 w<<1F7||q    qzr   c                     |                      |||          }|rXt          j                            |          }t          j                            |          }|d         |k    r|d         |k    rdS dS )z
        Retrieves row from db and only returns True if there's something in
        there and the timestamp is newer than the input files.
        	timestampTF)r/   r
   r   getmtime)r   r,   r-   r   rowtfatfbs          r   donezIntersectionMatrix.done}   st    
 ll2r:.. 	'""2&&C'""2&&CK 3&&S-=-C-Ctur   c                     t          j        |                              | j                  }| j        |d<    |j        |fi |}|                     |           d S )Nr   )
pybedtoolsBedToolset_chromsizesr   r   randomstatsadd_row)r   r,   r-   kwargsar.   s         r   run_and_insertz!IntersectionMatrix.run_and_insert   s_    r""11$+>>#|!---f--Wr   c                     g d}|D ]\  }}||         ||<   t                      |d<   d}| j                            ||           | j                                         dS )a  
        Inserts data into db.  `results` is a dictionary as returned by
        BedTool.randomstats with keys like::

            'iterations'
            'actual'
            'file_a'
            'file_b'
            self.fn
            other.fn
            'self'
            'other'
            'frac randomized above actual'
            'frac randomized below actual'
            'median randomized'
            'normalized'
            'percentile'
            'lower_%sth' % lower_thresh
            'upper_%sth' % upper_thresh
        ))file_afilea)file_bfileb)zmedian randomizedmedian)zfrac randomized above actualfractionabove)zfrac randomized below actualfractionbelowr1   a<  
        INSERT OR REPLACE INTO intersections (

            filea,
            fileb,
            timestamp,
            actual,
            median,
            iterations,
            self,
            other,
            fractionabove,
            fractionbelow,
            percentile)

            VALUES (

            :filea,
            :fileb,
            :timestamp,
            :actual,
            :median,
            :iterations,
            :self,
            :other,
            :fractionabove,
            :fractionbelow,
            :percentile)

        N)r   r   r$   r   r&   )r   r.   translationsorignewsqls         r   r<   zIntersectionMatrix.add_row   s~    ,
 
 
 & 	) 	)ID#"4=GCLL"uu< 	sG$$$	r   c                    t          | j                  }|dz  }d}t          j        t                    }| j        D ]}| j        D ]}|dz  }|rLt
          j                            dt                      z             t
          j        	                                 | 
                    ||| j                  s | j        ||fi | |                     ||| j                  |t          |                   t          |          <   |S )a  
        Matrix (implemented as a dictionary), where the final values are
        sqlite3.ROW objects from the database::

            {
                filea: {
                            filea: ROW,
                            fileb: ROW,
                            ...},
                fileb: {
                            filea: ROW,
                            fileb: ROW,
                            ...},

                        }
            }
           r   r(   z$%(i)s of %(total)s: %(fa)s + %(fb)s
)r+   r   collectionsdefaultdictdictsysstderrwriter*   flushr6   r   r?   r/   r   )	r   verboser=   nfilestotalimatrixr,   r-   s	            r   create_matrixz IntersectionMatrix.create_matrix   s   $ TY!(..) 	 	Bi  Q 'J$$%Lvxx%WXXXJ$$&&&yyR99 :'D'B99&99959\\DO6 6x||$Xb\\22 r   c                     dS )a:  
        Prints a pairwise matrix of values. `matrix` is a dict-of-dicts from
        create_matrix(), and `key` is a field name from the database -- one of:

        ['filea', 'fileb', 'timestamp', 'actual', 'median', 'iterations',
        'self', 'other', 'fractionabove', 'fractionbelow', 'percentile']
        Nr   )r   rY   keys      r   print_matrixzIntersectionMatrix.print_matrix   s      r   )NF)F)__name__
__module____qualname____doc__r!   r   r/   r6   r?   r<   rZ   r]   r   r   r   r   r      s         /( /( /( /(b   >  .    A A AF% % % %N    r   r   )
r
   rQ   r   r8   r   rN   r   r   objectr   r   r   r   <module>rc      s    				 



            8 8 8p p p p p p p p p pr   