
    tfR                     8   d dl Zd dlZd dlmc mZ d dlmc mZ	 d dl
Z
d dlZd dlZd dlZd dlZ ej$                  e      Zi ai Zddded<   ddded<   ddded<   ddded<   dZd	 Zd
 Z	 	 ddZd Zd Zd Z	 	 	 ddZd Zd Z d Z!d Z"d Z#d Z$d Zd Z%y)    NICEweightnamevalueKRVCVC_SQRT   c                     	 t        j                  ||kD        d   dz
  }||   }|||   z
  }| j	                  ||||   f      S # t        $ r | j                  d   cY S w xY w)zGet bin ID from absolute coordinates.

    Args:
        c (Cooler): Cooler instance of a .cool file.
        abs_pos (int): Absolute coordinate to be translated.

    Returns:
        int: Bin number.
    r      nbins)npflatnonzero
IndexErrorinfooffset)cabs_poschromschrom_cum_lengthschrom_sizeschr_idchromrel_poss           ^/var/www/html/software/conda/envs/higlass/lib/python3.12/site-packages/clodius/tiles/cooler.pyabs_coord_2_binr      sz     1G ;<Q?!C 6NE)&11G88UG[%7899  vvgs   A AAc                     | j                   }t        | j                        }t        j                  dt        j
                  | j                  j                        f   }|||fS )z
    Get the chromosome names and cumulative lengths:

    Args:

    c (Cooler): A cooler file

    Return:

    (names, sizes, lengths) -> (list(string), dict, np.array(int))
    r   )
chromnamesdict
chromsizesr   r_cumsumvalues)r   chrom_namesr   r   s       r   "get_chromosome_names_cumul_lengthsr&   /   sP     ,,Kq||$Ka1<<+>+>!??@%666    c                 0   t        j                  |       }t        |      \  }}	}
t        ||||
|	      }t        ||||
|	      }t        ||||
|	      }t        ||||
|	      }|j	                  dd      }||j
                  d   k\  s||j
                  d   k\  rQd\  }}}}t        j                  g d      t        j                  g g g d	      t        j                  g g g d	      ffS t        ||j
                  d   dz
        }t        ||j
                  d   dz
        }|||dz   ||dz   f   }	 g d
}|dk(  rd|j                         v s|dk(  r|j                  d       n|dv r|j                  |       |j                  d      |   }t        j                  ||      }|
|d      |d   z   |d<   |
|d      |d   z   |d<   |||dz    }|||dz    }|
|d      |d   z   |d<   |
|d      |d   z   |d<   |
|d      |d   z   |d<   |
|d      |d   z   |d<   |dk(  rd|j                         v s|dk(  r|d   |d   z  |d   z  |d<   |g d   ||ffS |dv r5|d   ||dz      z  ||dz      z  |d<   ||   |d<   ||   |d<   |g d   ||ffS |g d    d!fS )"a>  Get balanced pixel data.

    Args:
        f: h5py.File
            An HDF5 Group that contains the cooler for this resolution
        start_pos_1 (int): Test.
        end_pos_1 (int): Test.
        start_pos_2 (int): Test.
        end_pos_2 (int): Test.

    Returns:
        DataFrame: Annotated cooler pixels.
    TF)	as_pixelsbalancer   r   )r   r   r   r   )genome_start1genome_start2balanced)columns)genome_start
genome_endr   )r   startenddefaultr   )r   r	   r
   )convert_enumchrom1start1r+   chrom2start2r,   r   r1   r/   r2   r0   countweight1weight2r-   12)r+   r,   r9   )NN)coolerCoolerr&   r   matrixshapepd	DataFrameminbinsappendannotate)fstart_pos_1	end_pos_1start_pos_2	end_pos_2	transform
resolutionr   r   r   r   i0i1j0j1r@   pixelscolsrE   bins1bins2s                        r   get_datarW   A   sS   . 	aA/QRS/T,V[+	K1BK	PB	Iv/@+	NB	K1BK	PB	Iv/@+	NBXXeX4F	V\\!_fll1o 5 $BB LL!OPbbQRbbQR
 	
 V\\!_q()V\\!_q()BaKb1f,-F %DY8qvvx#7I<QH	-	-I66u6%d+D__VT*F/x0@AF8DTTF?/x0@AF8DTTF?b1fEb1fE-eGn=gNE.-eGn=gNE.+E'N;eElJE,+E'N;eElJE, 	Y8qvvx#7I<Q#G_vi/@@6)CTTzEFPUWW	-	-7OfY_55y38OO 	z  	*h	*hEFPUWWBC\RRr'   c                    t        j                  | d      5 }|j                  j                  d      }| t        j                  d       t        d      t        j                  |d         }t        |      \  }}}t        |d         }|j                  d   }t        |t        |         j                  d         }|t        z  d	|z  z  }	i }
t        |      D ]D  }|t        |         d
   }d|v rddd|
d<   d|v rddd|
d<   d|v rddd|
d<   d|v s=ddd|
d<   F ddg||g||	t        |
j                         d}ddd       |S # 1 sw Y   S xY w)zGet information of a cooler file.

    Args:
        file_path (str): Path to a cooler file.

    Returns:
        dict: Dictionary containing basic information about the cooler file.
    rzmax-zoomNzno zoom foundz$The `max_zoom` attribute is missing.0zbin-size   rE   r   r   r   r   r	   r
   g        )min_posmax_posmax_zoom	max_widthbins_per_dimension
transforms)h5pyFileattrsgetloggerr   
ValueErrorr>   r?   r&   intstr	TILE_SIZEranger$   )	file_pathrH   r_   r   r   r   r   total_lengthbin_sizer`   rb   i
f_for_zoomr   s                 r   _get_info_multi_v1rr      s    
9c	" '
a77;;z*KK(CDDMM!C&!3UVW3X0/,R0177:&qX'--j9:y(1=8	 
x 
	PA3q66*J:%05'I
8$z!,04#@
4 z!,04#@
4 J&1:Y(O
9%
	P Sz$l3 ""+$++-
A'
R KS'
R Ks   C>E	)E		Ec                     t        t        j                  | |t        z        dz         D cg c]
  }|d|z  z   c}S c c}w )Nr   r\   )rl   hgutget_quadtree_depthrk   )r!   base_resxs      r   get_zoom_resolutionsrx      sJ     t..z8i;OPSTTU 	16  s   >c                     t         j                  j                  | d      }t        ||      }t	        dj                  d |D                     y)ze
    Print comma-separated list of zoom resolutions for a given genome
    and base resolution.

    T)	all_names,c              3   2   K   | ]  }t        |        y w)N)rj   ).0ress     r   	<genexpr>z)print_zoom_resolutions.<locals>.<genexpr>   s     33s83s   N)r>   utilread_chromsizesrx   printjoin)chromsizes_filerv   r!   resolutionss       r   print_zoom_resolutionsr      s@     ,,_,MJ&z8<K	#((3{3
34r'   c                 *   d}||z  }||z  }	||z   |z  }
||z  }||z   |z  }t        j                  |       }t        |      \  }}}t        |j	                               }t        | |	|
dz
  ||dz
  ||      \  }\  }}i }t        d|      D ]  }t        d|      D ]  }||z   |z  }	||z   dz   |z  }
||z   |z  }||z   dz   |z  }||d   |	k\     }||d   |
k     }||d   |k\     }||d   |k     }|}|d   j                  |	z
  |z  j                  t              }|d   j                  |z
  |z  j                  t              }d|v r#t        j                  |d   j                        }n"t        j                  |d   j                        }t        j                  d	t        j                  
      }||||f<   ||||d   |	k\     }||d   |k\     }||d   |
k     }||d   |k     }|t        j                  |d            } |t        j                  |d            }!| d   j                  |	z
  |z  j                  t              }"|!d   j                  |z
  |z  j                  t              }#t        j                  t        |t        |
      t        |                  |	z
  |z  j                  t              }$t        j                  t        |t        |      t        |                  |z
  |z  j                  t              }%|$|$dk\     }$|%|%dk\     }%t        j                  |dd|"f<   t        j                  ||#ddf<   t        j                  |dd|$f<   t        j                  ||%ddf<   |j!                         |||z   ||z   f<     |S )a  
    Generate tiles for a given location. This function retrieves tiles for
    a rectangular region of width x_width and height y_width

    Parameters
    ---------
    hdf_for_resolution: h5py.File
        An HDF group containing the cooler for the given resolution
    x_pos: int
        The starting x position
    y_pos: int
        The starting y position
    cooler_file: string
        The filename of the cooler file to get the data from
    x_width: int
        The number of tiles to retrieve along the x dimension
    y_width: int
        The number of tiles to retrieve along the y dimension

    Returns
    -------
    data_by_tilepos: {(x_pos, y_pos) : np.array}
        A dictionary of tile data indexed by tile positions
    r   r   )rN   r   r+   r,   r-   r9   )r   r   )dtypeNr/   r   )r>   r?   r&   sumr$   rW   rl   astyperi   r   
nan_to_numzerosfloat32isnanarraynanravel)&hdf_for_resolutionrN   x_posy_postransform_typex_widthy_widthBINS_PER_TILE	tile_sizer6   end1r8   end2r   r   r   r   rn   datarU   rV   data_by_tileposx_offsety_offsetdfbinsizejrp   vout	sub_bins1	sub_bins2	nan_bins1	nan_bins2bibjbend1bend2s&                                         r   
make_tilesr      s
   B M]*IYFGOy(DYFGOy(D 	()A/QRS/T,V[+{))+,L%qqT>E5" O!W% HPa) G	PHh&)3FH$q(I5Dh&)3FH$q(I5D d?+v56BB'$./BB'612BB'$./B G_%,,v5'AII#NA_%,,v5'AII#NARMM"Z."7"78MM"W+"4"45((:RZZ8CC1I U%6!%"76"AB	!%"76"AB	%i&?$&FG	%i&?$&FG	 &bhhy/B&CD	%bhhy/B&CD	 077&@WLTT !077&@WLTT
 XXeL#d)S_MNQWW&+ 
 XXeL#d)S_MNQWW&+ 
 eqj)eqj)VVArE
VVBE
 "AuH "E1H EHIIKOUX-ux/?@AOG	PHPT r'   c           	          t        j                  t              }| D ]U  }|j                  d      }t	        t        t        |dd             }|d   }t        |      }|||f   j                  |       W |S )ap  
    Place these tiles into separate lists according to their
    zoom level and transform type

    Parameters
    ----------
    tile_ids: [str,...]
        A list of tile_ids (e.g. xyx.0.0.1) identifying the tiles
        to be retrieved

    Returns
    -------
    tile_lists: {(zoomLevel, transformType): [tile_id, tile_id]}
        A dictionary of tile ids
    .r      r   )	coldefaultdictsetsplitlistmapri   get_transform_typeadd)tile_idstile_id_liststile_idtile_id_partstile_position
zoom_leveltransform_methods          r   %bin_tiles_by_zoom_level_and_transformr     s      OOC(M Cc*SmAa&89:"1%
-g6z#34599'BC r'   c                 V    | j                  d      }t        |      dkD  r|d   }|S d}|S )z
    Get the transform type specified in the tile id.
    Parameters
    ----------
    cooler_tile_id: str
        A tile id for a 2D tile (cooler)
    Returns
    -------
    transform_type: str
        The transform type requested for this tile
    r   r   r3   )r   len)r   r   r   s      r   r   r     s@     MM#&M
=A(+  %r'   c                     t               }| d   }d|v r|j                  d       d|v r|j                  d       d|v r|j                  d       d|v r|j                  d       |S )a  
    Get the available resolutions from a single cooler file.
    Parameters
    ----------
    cooler: h5py File
        A cooler file containing binned 2D data
    Returns
    -------
    transforms: dict
        A list of transforms available for this dataset
    rE   r   r   r	   r
   )r   r   )r>   rb   rq   s      r   get_available_transformsr     sj     JJ:x ztztJy!r'   c                    t        j                  | d      }d|v rydt        t        t	        t
        t        |d   j                                                 i}||gt        | <   i }|d   D ]  }t        |d   t        |               ||<   ! t        j                  |j                          }|D cg c]  }t        |    c}|d<   t        |d   j                               d   }t        t        |d   |   d   d               }||g|d<   ddg|d	<   t!        j"                  |d   |         }|j$                  j'                         D 	cg c]  }	|	d   t        |	d         g c}	|d
<   d|j(                  v r|j(                  d   dk(  rd|d<   ||fS t+        |       }t!        j"                  |d         }|j$                  j'                         D 	cg c]  }	|	d   t        |	d         g c}	|d
<   |d	   D 
cg c]  }
t        |
       c}
|d	<   |d   D 
cg c]  }
t        |
       c}
|d<   t        |d         |d<   t        |d         |d<   d|v rt        |d         |d<   d|j(                  v r|j(                  d   sd|d<   d|j(                  v r|j(                  d   dk(  rd|d<   ||gt        | <   ||fS c c}w c c}	w c c}	w c c}
w c c}
w )zJ
    Create the file handle and tileset info for a cooler
    tileset
    rY   r   rb   r   r   lengthr^   r   r]   r!   zstorage-modesquarefalsemirror_tilesrZ   r_   r`   	symmetric)rc   rd   tuplesortedr   ri   r   keysmatsr   rj   r   intersectionr$   transform_descriptionsr   r>   r?   r!   itemsr   rr   )filepathrH   r   #available_transforms_per_resolutionrN   all_available_transformstgenome_lengthr   rw   ms              r   	make_matsr     s   
 			(C A uVCT!M:J:O:O:Q5R,S%TUVTX
 /1+}- 	J>V- Z1?/
;	
 $'#3#30779$
 
 0H
*+"1%
\
 !M*//1215
C- 0 <X Fx PQR(-8Ya&YMM!M*:679:9K9K9MNAqtS1Y/N\QVV#~(>((J#*D . d7N+ "(+MM!C&!9:9K9K9MNAqtS1Y/N\+/	?;a3q6;Y+/	?;a3q6;YtJ/0Z[ 12[4!%d<&8!9D !&& )<#*D QVV#~(>((J#*D TXd7NK
 O O;;s   2J?KK	;KKc                 J    | t         v rt         |    d   S t        |       \  }}|S )z
    Get the tileset info for a cooler file

    Parameters:
    -----------

    filepath: str
        The location of the cooler file
    r   )r   r   )r   rH   r   s      r   tileset_infor     s.     4H~a  h'	Dr'   c                     | j                  d      }|d   }|dd }t        |       }dj                  |g|z   |gz         }|S )a  
    Add a transform type to a cooler tile id if it's not already
    present.

    Parameters
    ----------
    tile_id: str
        A tile id (e.g. xyz.0.1.0)

    Returns
    -------
    new_tile_id: str
        A formatted tile id, potentially with an added transform_type
    r   r   r   r   )r   r   r   )r   r   tileset_uuidr   r   new_tile_ids         r   add_transform_typer   &  sU     MM#&M #L!!A&M'0N((L>M9^<LLMKr'   c                     i }g }|D ]  }t        |      }|||<   ||gz  } t        | |      }g }|D ]  \  }}||v s||   }	||	|fgz  } |S ) )r   generate_tiles)
r   r   transform_id_to_original_idnew_tile_idsr   r   generated_tilestiles_to_return
tile_valueoriginal_tile_ids
             r   tilesr   >  s    "$L &(13:#K0%&
 %X|<OO. @11:7C!1: >??O	@ r'   c                    d}| t         vrt        |        t         |    }t        |      j                         }t	        t        j                  |D cg c]  }t        j                  |       c}       }g }|D ]=  }t        |d   j                  d      d         }	|d   j                  d      d   }
t        |d         }|d   |d   }dv rMt        d   D cg c]  }t        |       c}d      }|	t        |      kD  r||	   }|d   t        |         }n%|	d   kD  r|t        |	         }d	   d
|	z  z  |z  }|D cg c]-  }|j                  d      d
d D cg c]  }t        |       c}/ }}}t	        t        fd|            }t	        t        fd|            }t        |      dk(  rCt!        |D cg c]  }|d   	 c}      }t#        |D cg c]  }|d   	 c}      }t!        |D cg c]  }|d   	 c}      }t#        |D cg c]  }|d   	 c}      }t%        |||||||z
  dz   ||z
  dz         }|j'                         D cg c]N  \  }}dj)                  t+        t        |
g|	gz   t	        |      z   |gz               t-        j.                  |      fP }}}||z  }@ |S c c}w c c}w c c}w c c}}w c c}w c c}w c c}w c c}w c c}}w )a  
    Generate tiles from a cooler file.
    Parameters
    ----------
    tileset: tilesets.models.Tileset object
        The tileset that the tile ids should be retrieved from
    tile_ids: [str,...]
        A list of tile_ids (e.g. xyx.0.0.1) identifying the tiles
        to be retrieved
    Returns
    -------
    generated_tiles: [(tile_id, tile_data),...]
        A list of tile_id, tile_data tuples
    r   r   r   r   r   T)reverser_   r`   r\   r   c                 &    | d   d   d   dz   k  S )Nr   r^   r    rw   r   s    r   <lambda>z generate_tiles.<locals>.<lambda>      QqTL$;A$>$BB r'   c                 &    | d   d   d   dz   k  S )Nr   r^   r   r   s    r   r   z generate_tiles.<locals>.<lambda>  r   r'   )r   r   r   r$   r   itchainrt   partition_by_adjacent_tilesri   r   r   r   r   rj   filterrD   maxr   r   r   r   hgfoformat_dense_tile)r   r   r   tileset_file_and_infotile_ids_by_zoom_and_transformr   partitioned_tile_idsr   
tile_groupr   
tileset_idr   tileset_filerY   sorted_resolutionsrN   r   rw   tile_positionsminxmaxxminymaxytile_data_by_positionposition	tile_datar   r   s                              @r   r   r   U  s1    Mt( N%J&fh #  
 8 003	
 O* C!
A,,S1!45
]((-a0
+JqM:,Q/,Q/L(!'!-m!<=AQ=t" C 233+J7J!-m!<S_!ML44!-c*o!>&{3a:oEVJGQR!1773<!+<=a3q6=RR BNS
 BNS
 ~!#.1QAaD12.1QAaD12.1QAaD12.1QAaD12 *4K!O4K!O!
( *?)D)D)F
 &9 #
|3d8nDGWW &&y1
 
 	5 GC!J ]$ > >R 2111
sC   J<K
K"K4KK
!K
=K
K 
AK%K)r3   N)r3   r   r   )&collectionsr   r>   clodius.tiles.formatr   formatr   clodius.tiles.utilsutilsrt   rc   	itertoolsr   numpyr   pandasrB   logging	getLogger__name__rg   r   r   rk   r   r&   rW   rr   rx   r   r   r   r   r   r   r   r   r   r   r'   r   <module>r     s      # # " "     			8	$ 
 ,1H#E x  (,t< t (,t< t -6$K y !	:,70 `SF3l5  Rj<,8@F$0.jr'   