o
    Nrf                     @   s   d dl mZ d dlmZ d dlZd dlZd dlZzd dl	m
Z
 W n ey,   eddw d dlmZ d dlmZ d dlmZ ddlmZmZ dd	lmZmZ d
d ZdddZdddZdd ZdddZdd Z	dddZ dS )    )product)ceilN)COOz,The 'sparse' package is required to use dask)tokenize   )	CSRReader
query_rect)parse_cooler_uri	partitionc           	         s   t | dZ}||  |d u rt  }t |d  }i }|D ]}t j | jd}|d ur9t||jd||< q!t	j
 fdd|D |d}|D ]}t	jg || dd	||< qJW d    n1 scw   Y  ||||fS )
Nrr   )enum)keyc                    s"   i | ]}|t jg  | jd qS ))dtype)nparrayr   .0r   grp \/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/cooler/sandbox/dask.py
<dictcomp>&      " z#_get_group_info.<locals>.<dictcomp>)columnsT)
categoriesordered)h5pyFilelistkeyslenZcheck_dtyper   sorted__getitem__pd	DataFrameCategorical)	path	grouppathr   fnrowscategoricalsr   dtmetar   r   r   _get_group_info   s&   r-   c                 C   s   z=|d ur	|   t| d}|| | | W  d    W |d ur(|  S S 1 s,w   Y  W |d ur<|  d S d S |d urG|  w w )Nr   acquirer   r   release)filepathr'   r   slclockr(   r   r   r   _slice_dataset/   s   

r4   c                    s   z@|d ur	|   t| d  fdd|D W  d    W |d ur+|  S S 1 s/w   Y  W |d ur?|  d S d S |d urJ|  w w )Nr   c                    s   i | ]}|  |  qS r   r   r   r(   r'   r2   r   r   r   ?   s    z _slice_group.<locals>.<dictcomp>r.   )r1   r'   r   r2   r3   r   r5   r   _slice_group:   s   

r6   c                 C   s0   |  D ]\}}tjj| | |dd| |< q| S )NT)r   )itemsr#   r%   Z
from_codes)dataZcategorical_columnsr   Zcategory_dictr   r   r   _restore_categoriesE   s   r9   逖 c                 C   s  t | \}}t|||\}}}}	t||||}
d|
 }dttd||dd  }|d |d kr:g ||d R }i }tdtt|| D ](}t|| |d | }t|||||f}|	rct	||	f}t
j|d|jf|||f< qGt||||}|dur|j|ddd	}|S )
a  
    Create a dask dataframe around a column-oriented table in HDF5.

    A table is a group containing equal-length 1D datasets.

    Parameters
    ----------
    group_uri : str
        URI to the HDF5 group storing the table.
    keys : list, optional
        list of HDF5 Dataset keys, default is to use all keys in the group
    chunksize : int, optional
        Chunk size
    index : str, optional
        Sorted column to use as index
    lock : multiprocessing.Lock, optional
        Lock to serialize HDF5 read/write access. Default is no lock.

    Returns
    -------
    :class:`dask.dataframe.DataFrame`

    Notes
    -----
    Learn more about the `dask <https://docs.dask.org/en/latest/>`_ project.

    zdaskify-h5py-table-)r      Nr   TF)r!   Zdrop)r	   r-   r   tuplerangeintr   slicer6   r9   r#   r$   r   ddZ	set_index)Z	group_urir   	chunksizeindexr3   r1   r'   r)   r,   r*   token	task_nameZ	divisionsdskir2   Z	data_dictdfr   r   r   
read_tableK   s,   
rI   c              	   C   s   | j }| d;}|d | j}	t||dd}
|r(t|
j||||dd\}}}n|
||||\}}}t|s<||	}W d    n1 sFw   Y  t|| || f||| || fd}|sd|	 }|S )Nr   Zpixelsi e)Z	max_chunkT)Zduplex)shape)
Z_is_symm_upperopenr   r   r   queryr    Zastyper   Ztodense)clri0i1j0j1fieldsparse_arrayZis_upperZh5r   readerrG   jvZarrr   r   r   _array_select   s   
&	rW   countF   c                    s   t  j||||}d| }	||  f}
t dddd}fddtd|
d |D }tdd |D |
d ff}tt|	ggdd |D R  } fd	d|D }tt||}t	j
||	|||
d
S )aj  
    Create a parallel Dask array around the matrix representation of a cooler.

    Parameters
    ----------
    clr : :class:`cooler.Cooler`
        Cooler object
    i0, i1 : int
        Row query range
    j0, j1 : int
        Column query range
    field : str
        Value column to query
    sparse_array : bool, optional
        Create a dask array backed by :class:`sparse.COO` sparse arrays
        instead of dense numpy arrays (default).
    chunksize : int, optional
        Length of the rowwise chunks to partition the underlying data into.

    Returns
    -------
    :class:`dask.array.Array`

    zcooler-array-slice-r   c                    s   g | ]
\}}|| fqS r   r   )r   lohi)rP   rQ   r   r   
<listcomp>   s    z#load_dask_array.<locals>.<listcomp>c                 s   s     | ]}|d  |d  V  qdS )r<   r   Nr   )r   sr   r   r   	<genexpr>   s    z"load_dask_array.<locals>.<genexpr>r<   c                 S   s   g | ]}t t|qS r   )r>   r    )r   dimr   r   r   r\      s    c                    s"   g | ]}t  g|R qS r   )rW   )r   r2   )rM   rR   rS   r   r   r\      r   )r,   rJ   )r   urirW   r
   r=   r   r   dictzipdaZArray)rM   rN   rO   rP   rQ   rR   rS   rB   rD   rE   rJ   r,   Zsliceschunksr   valuesrF   r   )rM   rR   rP   rQ   rS   r   load_dask_array   s     rf   )N)Nr:   NN)rX   FrY   )!	itertoolsr   mathr   r   numpyr   Zpandasr#   sparser   ImportErrorZ
dask.arrayr   rc   Zdask.dataframeZ	dataframerA   Z	dask.baser   corer   r   utilr	   r
   r-   r4   r6   r9   rI   rW   rf   r   r   r   r   <module>   s.    



;