o
    NrfR                     @   s4  d dl Zd dlZd dlm  mZ d dlm  mZ	 d dl
Z
d dlZd dlZd dlZd dlZeeZi ai Zddded< ddded< ddded< ddded< dZd	d
 Zdd Z		d)ddZdd Zdd Zdd Z			d*ddZdd Zdd Z dd Z!dd  Z"d!d" Z#d#d$ Z$d%d& Zd'd( Z%dS )+    NICEweightnamevalueKRVCVC_SQRT   c                 C   s`   zt ||kd d }W n ty   | jd  Y S w || }|||  }| |||| fS )zGet bin ID from absolute coordinates.

    Args:
        c (Cooler): Cooler instance of a .cool file.
        abs_pos (int): Absolute coordinate to be translated.

    Returns:
        int: Bin number.
    r      Znbins)npZflatnonzero
IndexErrorinfooffset)cZabs_poschromschrom_cum_lengthschrom_sizesZchr_idchromZrel_pos r   ]/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/clodius/tiles/cooler.pyabs_coord_2_bin   s   r   c                 C   s2   | j }t| j}tjdt| jjf }|||fS )z
    Get the chromosome names and cumulative lengths:

    Args:

    c (Cooler): A cooler file

    Return:

    (names, sizes, lengths) -> (list(string), dict, np.array(int))
    r   )Z
chromnamesdict
chromsizesr   Zr_Zcumsumvalues)r   Zchrom_namesr   r   r   r   r   "get_chromosome_names_cumul_lengths/   s   

r   defaultc                 C   s  t | }t|\}}	}
t||||
|	}t||||
|	}t||||
|	}t||||
|	}|jddd}||jd ksA||jd krad\}}}}tjg ddtg g g d	tg g g d	ffS t||jd d }t||jd d }|||d ||d f }	 g d
}|dkrd|	 v s|dkr|
d n	|dv r|
| |j	dd| }t ||}|
|d  |d  |d< |
|d  |d  |d< |||d  }|||d  }|
|d  |d  |d< |
|d  |d  |d< |
|d  |d  |d< |
|d  |d  |d< |dkrd|	 v s|dkr4|d |d  |d  |d< |g d ||ffS |dv ra|d ||d   ||d   |d< || |d< || |d< |g d ||ffS |g d  d!fS )"a>  Get balanced pixel data.

    Args:
        f: h5py.File
            An HDF5 Group that contains the cooler for this resolution
        start_pos_1 (int): Test.
        end_pos_1 (int): Test.
        start_pos_2 (int): Test.
        end_pos_2 (int): Test.

    Returns:
        DataFrame: Annotated cooler pixels.
    TF)Z	as_pixelsZbalancer   r   )r   r   r   r   )genome_start1genome_start2balanced)columns)genome_start
genome_endr   )r   startendr   r   )r   r   r	   )Zconvert_enumZchrom1start1r   Zchrom2start2r   r   r#   r!   r$   r"   countZweight1Zweight2r   12)r   r   r'   )NN)coolerCoolerr   r   matrixshapepdZ	DataFrameminbinsappendZannotate)fZstart_pos_1Z	end_pos_1Zstart_pos_2Z	end_pos_2Z	transform
resolutionr   r   r   r   Zi0i1Zj0Zj1r,   Zpixelscolsr0   bins1bins2r   r   r   get_dataA   sV   
	
"
r8   c                 C   sH  t | d}|jd}|du rtd tdt|d }t	|\}}}t
|d }|jd }t
|t| jd }|t d	|  }	i }
t|D ]6}|t| d
 }d|v raddd|
d< d|v rlddd|
d< d|v rwddd|
d< d|v rddd|
d< qLddg||g||	t|
 d}W d   |S 1 sw   Y  |S )zGet information of a cooler file.

    Args:
        file_path (str): Path to a cooler file.

    Returns:
        dict: Dictionary containing basic information about the cooler file.
    rzmax-zoomNzno zoom foundz$The `max_zoom` attribute is missing.0zbin-size   r0   r   r   r   r   r   r	   g        )min_posmax_posmax_zoom	max_widthZbins_per_dimension
transforms)h5pyFileattrsgetloggerr   
ValueErrorr*   r+   r   intstr	TILE_SIZEranger   )	file_pathr2   r?   r   r   r   r   total_lengthZbin_sizer@   rA   i
f_for_zoomr   r   r   r   _get_info_multi_v1   sD   



))rP   c                    s&    fddt t|  t d D S )Nc                    s   g | ]} d |  qS )r<   r   .0xbase_resr   r   
<listcomp>   s    
z(get_zoom_resolutions.<locals>.<listcomp>r   )rK   hgutZget_quadtree_depthrJ   )r   rU   r   rT   r   get_zoom_resolutions   s   
rX   c                 C   s6   t jj| dd}t||}tddd |D  dS )ze
    Print comma-separated list of zoom resolutions for a given genome
    and base resolution.

    T)Z	all_names,c                 s   s    | ]}t |V  qd S )N)rI   )rR   resr   r   r   	<genexpr>   s    z)print_zoom_resolutions.<locals>.<genexpr>N)r*   utilZread_chromsizesrX   printjoin)Zchromsizes_filerU   r   resolutionsr   r   r   print_zoom_resolutions   s   
r`   r   c           &   	   C   s  d}|| }|| }	|| | }
|| }|| | }t | }t|\}}}t| }t| |	|
d ||d ||d\}\}}i }td|D ]<}td|D ]3}|| | }	|| d | }
|| | }|| d | }||d |	k }||d |
k  }||d |k }||d |k  }|}|d j|	 | t}|d j| | t}d|v rt	
|d j}nt	
|d j}t	jd	t	jd
}||||f< |durv|durv||d |	k }||d |k }||d |
k  }||d |k  }|t	|d  } |t	|d  }!| d j|	 | t}"|!d j| | t}#t	t|t|
t||	 | t}$t	t|t|t|| | t}%|$|$dk }$|%|%dk }%t	j|dd|"f< t	j||#ddf< t	j|dd|$f< t	j||%ddf< | ||| || f< qNqF|S )a  
    Generate tiles for a given location. This function retrieves tiles for
    a rectangular region of width x_width and height y_width

    Parameters
    ---------
    hdf_for_resolution: h5py.File
        An HDF group containing the cooler for the given resolution
    x_pos: int
        The starting x position
    y_pos: int
        The starting y position
    cooler_file: string
        The filename of the cooler file to get the data from
    x_width: int
        The number of tiles to retrieve along the x dimension
    y_width: int
        The number of tiles to retrieve along the y dimension

    Returns
    -------
    data_by_tilepos: {(x_pos, y_pos) : np.array}
        A dictionary of tile data indexed by tile positions
    r
   r   )r3   r   r   r   r   r'   )r
   r
   )ZdtypeNr!   r   )r*   r+   r   sumr   r8   rK   ZastyperH   r   Z
nan_to_numZzerosZfloat32isnanarraynanZravel)&hdf_for_resolutionr3   Zx_posZy_postransform_typeZx_widthZy_widthBINS_PER_TILEZ	tile_sizer%   Zend1r&   Zend2r   r   r   r   rM   datar6   r7   Zdata_by_tileposZx_offsetZy_offsetdfZbinsizejrN   voutZ	sub_bins1Z	sub_bins2Z	nan_bins1Z	nan_bins2ZbiZbjZbend1Zbend2r   r   r   
make_tiles   s   !
Irm   c                 C   sZ   t t}| D ]#}|d}ttt|dd }|d }t|}|||f | q|S )ap  
    Place these tiles into separate lists according to their
    zoom level and transform type

    Parameters
    ----------
    tile_ids: [str,...]
        A list of tile_ids (e.g. xyx.0.0.1) identifying the tiles
        to be retrieved

    Returns
    -------
    tile_lists: {(zoomLevel, transformType): [tile_id, tile_id]}
        A dictionary of tile ids
    .r      r   )	coldefaultdictsetsplitlistmaprH   get_transform_typeadd)tile_idsZtile_id_liststile_idtile_id_partstile_position
zoom_leveltransform_methodr   r   r   %bin_tiles_by_zoom_level_and_transform  s   

r~   c                 C   s*   |  d}t|dkr|d }|S d}|S )z
    Get the transform type specified in the tile id.
    Parameters
    ----------
    cooler_tile_id: str
        A tile id for a 2D tile (cooler)
    Returns
    -------
    transform_type: str
        The transform type requested for this tile
    rn   ro   r   )rs   len)ry   rz   r}   r   r   r   rv     s   
rv   c                 C   sZ   t  }| d }d|v r|d d|v r|d d|v r"|d d|v r+|d |S )a  
    Get the available resolutions from a single cooler file.
    Parameters
    ----------
    cooler: h5py File
        A cooler file containing binned 2D data
    Returns
    -------
    transforms: dict
        A list of transforms available for this dataset
    r0   r   r   r   r	   )rr   rw   )r*   rA   rO   r   r   r   get_available_transforms  s   



r   c                 C   s  t | d}d|v rdttttt|d  i}||gt| < i }|d D ]}t	|d t
| ||< q'tj|  }dd |D |d< t|d  d }tt|d | d d }||g|d	< d
d
g|d< t|d | }dd |j D |d< d|jv r|jd dkrd|d< ||fS t| }t|d }dd |j D |d< dd |d D |d< dd |d	 D |d	< t|d |d< t|d |d< d|v rt|d |d< d|jv r|jd sd|d< d|jv r|jd dkrd|d< ||gt| < ||fS )zJ
    Create the file handle and tileset info for a cooler
    tileset
    r9   r_   c                 S   s   g | ]}t | qS r   )transform_descriptionsrR   tr   r   r   rV     s    zmake_mats.<locals>.<listcomp>rA   r   r   lengthr>   r   r=   c                 S       g | ]}|d  t |d gqS r   r   rH   rQ   r   r   r   rV          r   zstorage-modeZsquarefalseZmirror_tilesr:   c                 S   r   r   r   rQ   r   r   r   rV      r   c                 S      g | ]}t |qS r   r   rR   mr   r   r   rV         c                 S   r   r   r   r   r   r   r   rV     r   r?   r@   Z	symmetric)rB   rC   tuplesortedru   rH   rt   keysmatsr   rI   rr   intersectionr   ra   r*   r+   r   itemsr   rP   )filepathr2   r   Z#available_transforms_per_resolutionr3   Zall_available_transformsZgenome_lengthr   r   r   r   	make_mats  sN   "

r   c                 C   s$   | t v r
t |  d S t| \}}|S )z
    Get the tileset info for a cooler file

    Parameters:
    -----------

    filepath: str
        The location of the cooler file
    r   )r   r   )r   r2   r   r   r   r   tileset_info  s   
r   c                 C   s@   |  d}|d }|dd }t| }d|g| |g }|S )a  
    Add a transform type to a cooler tile id if it's not already
    present.

    Parameters
    ----------
    tile_id: str
        A tile id (e.g. xyz.0.1.0)

    Returns
    -------
    new_tile_id: str
        A formatted tile id, potentially with an added transform_type
    rn   r   r   ro   )rs   rv   r^   )ry   rz   Ztileset_uuidr{   rf   new_tile_idr   r   r   add_transform_type&  s   
r   c           
      C   sj   i }g }|D ]}t |}|||< ||g7 }qt| |}g }|D ]\}}||v r2|| }	||	|fg7 }q|S ) )r   generate_tiles)
r   rx   Ztransform_id_to_original_idZnew_tile_idsry   r   generated_tilesZtiles_to_returnZ
tile_valueZoriginal_tile_idr   r   r   tiles>  s   
r   c              
      s  d}| t vr
t|  t |  }t| }ttjdd |D  }g }|D ]}t|d dd |d dd  t	|d |d |d }dv rmt
dd d D d	d
}	t|	kr`q$|	 }
|d t|
 }nd krtq$|t }d d  | }
dd |D }ttfdd|}ttfdd|}t|dkrq$tdd |D }tdd |D }tdd |D }tdd |D }t||
|||| d || d } fdd| D }||7 }q$|S )a  
    Generate tiles from a cooler file.
    Parameters
    ----------
    tileset: tilesets.models.Tileset object
        The tileset that the tile ids should be retrieved from
    tile_ids: [str,...]
        A list of tile_ids (e.g. xyx.0.0.1) identifying the tiles
        to be retrieved
    Returns
    -------
    generated_tiles: [(tile_id, tile_data),...]
        A list of tile_id, tile_data tuples
    r
   c                 S   s   g | ]}t |qS r   )rW   Zpartition_by_adjacent_tilesr   r   r   r   rV   q  s    z"generate_tiles.<locals>.<listcomp>r   rn   r   r_   c                 S   r   r   r   )rR   r9   r   r   r   rV     r   T)reverser?   r@   r<   c                 S   s(   g | ]}d d | ddd D qS )c                 S   r   r   r   rQ   r   r   r   rV     r   z-generate_tiles.<locals>.<listcomp>.<listcomp>rn   r<   ro   )rs   r   r   r   r   rV     s   ( c                    s   | d  d d d k S )Nr   r>   r   r   rS   r   r   r   <lambda>      z generate_tiles.<locals>.<lambda>c                    s   | d  d d d k S )Nr   r>   r   r   r   r   r   r     r   c                 S      g | ]}|d  qS r   r   r   r   r   r   rV     r   c                 S   r   r   r   r   r   r   r   rV     r   c                 S   r   r   r   r   r   r   r   rV     r   c                 S   r   r   r   r   r   r   r   rV     r   c              	      s@   g | ]\}}d  tt gg t| g t|fqS )rn   )r^   ru   rI   rt   hgfoZformat_dense_tile)rR   positionZ	tile_data)
tileset_idrf   r|   r   r   rV     s    
)r   r   r~   r   rt   itchainrH   rs   rv   r   r   rI   filterr/   maxrm   r   )r   rx   rg   Ztileset_file_and_infoZtile_ids_by_zoom_and_transformZpartitioned_tile_idsr   Z
tile_groupZtileset_fileZsorted_resolutionsr3   re   Ztile_positionsZminxZmaxxZminyZmaxyZtile_data_by_positionr   r   )r   r   rf   r|   r   r   U  s|   	




r   )r   N)r   r   r   )&collectionsrp   r*   Zclodius.tiles.formatr   formatr   Zclodius.tiles.utilsutilsrW   rB   	itertoolsr   numpyr   Zpandasr.   logging	getLogger__name__rF   r   r   rJ   r   r   r8   rP   rX   r`   rm   r~   rv   r   r   r   r   r   r   r   r   r   <module>   sL    

c6
 C