o
    Nrfd4                     @  s  d dl mZ d dlZd dlmZ d dlmZ d dlZd dlZ	d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZmZmZ d dlmZ d dlmZ G dd dZG dd deZ G dd deZ!dd Z"dd Z#dd Z$dd Z%dS )    )annotationsN)defaultdict)datetime)is_bool_dtype)Array)tokenize)methods)IndexingError)Seriesnew_dd_object)is_index_likeis_series_likemeta_nonempty)HighLevelGraph)is_arraylikec                   @  s<   e Zd Zdd Zedd Zedd Zdd Zd	d
 ZdS )_IndexerBasec                 C  s
   || _ d S N)obj)selfr    r   `/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/dask/dataframe/indexing.py__init__   s   
z_IndexerBase.__init__c                 C  s   | j jS r   )r   _namer   r   r   r   r      s   z_IndexerBase._namec                 C  s   t r   )NotImplementedErrorr   r   r   r   _meta_indexer   s   z_IndexerBase._meta_indexerc                 C  s    |du r| j S | jdd|f S )z
        get metadata
        N)r   r   )r   iindexercindexerr   r   r   
_make_meta!   s   z_IndexerBase._make_metac                 C  s   t | jt| jfS r   )type__name__r   r   r   r   r   r   __dask_tokenize__*   s   z_IndexerBase.__dask_tokenize__N)	r    
__module____qualname__r   propertyr   r   r   r!   r   r   r   r   r      s    

	r   c                   @  s(   e Zd Zedd Zdd Zdd ZdS )_iLocIndexerc                 C  
   | j jjS r   )r   _metailocr   r   r   r   r   /      
z_iLocIndexer._meta_indexerc                 C  st   d}t |tst|t|dkrtd|\}}|td kr#t|| jjjs.| 	||S | jj| }| j
|S )Nzd'DataFrame.iloc' only supports selecting columns. It must be used like 'df.iloc[:, column_indexer]'.   Too many indexers)
isinstancetupler   len
ValueErrorslicer   columnsZ	is_unique_iloc__getitem__)r   keymsgr   r   Z	col_namesr   r   r   r3   3   s   

z_iLocIndexer.__getitem__c                 C  s0   |t d ksJ | ||}| jjtj||dS )Nmeta)r0   r   r   map_partitionsr   r(   r   r   r   r7   r   r   r   r2   L   s   z_iLocIndexer._ilocN)r    r"   r#   r$   r   r3   r2   r   r   r   r   r%   .   s
    
r%   c                   @  sl   e Zd ZdZedd Zdd Zdd Zdd	 Zd
d Z	dd Z
dd Zdd Zdd Zdd Zdd ZdS )_LocIndexerz"Helper class for the .loc accessorc                 C  r&   r   )r   r'   locr   r   r   r   r   V   r)   z_LocIndexer._meta_indexerc                 C  sL   t |trt|| jjkrd}t||d }|d }n|}d }| ||S )Nr+   r      )r,   r-   r.   r   ndimr	   _loc)r   r4   r5   r   r   r   r   r   r3   Z   s   

z_LocIndexer.__getitem__c                 C  s  t |tr| ||S t |tr| ||S t|r#| || j|S | jjr\| 	|}t |t
r7| ||S t|rGt|jsG| |j|S t |tsPt|rV| ||S | ||S t |ttjfsmt|rst|jssd}t|t |t
s}t
||}| ||}| jjtj|||dS )z%Helper function for the .loc accessorz^Cannot index with list against unknown division. Try setting divisions using ``ddf.set_index``r6   )r,   r
   _loc_seriesr   
_loc_arraycallabler>   r   known_divisions_maybe_partial_time_stringr0   
_loc_slicer   r   dtype	_loc_listvalueslistr   _loc_elementnpZndarrayKeyErrorr   r8   r   Ztry_loc)r   r   r   r5   r7   r   r   r   r>   j   s:   






z_LocIndexer._locc                 C  s   t | jjj}t||}|S )z{
        Convert index-indexer for partial time string slicing
        if obj.index is DatetimeIndex / PeriodIndex
        )r   r   r'   indexrC   )r   r   idxr   r   r   rC      s   
z&_LocIndexer._maybe_partial_time_stringc                 C  s6   t |js	td| ||}| jjtj||d|dS )NzuCannot index with non-boolean dask Series. Try passing computed values instead (e.g. ``ddf.loc[iindexer.compute()]``)z
loc-series)tokenr7   )r   rE   rK   r   r   r8   r   r;   r9   r   r   r   r?      s   
z_LocIndexer._loc_seriesc                 C  s   | d| jj}| ||S )N_)Zto_dask_dataframer   rL   r?   )r   r   r   Ziindexer_seriesr   r   r   r@      s   z_LocIndexer._loc_arrayc                 C  s   dt || j }| |}| ||}t|r[i }g }t| }t|D ]\}	\}
}tj	| j
|
f||f|||	f< |t|d  q%|t|d d d  tj||| jgd}nd d g}|df|di}t||}t||||dS )Nloc-%sr   r<   dependenciesr7   	divisions)r   r   _get_partitionsr   r.   sorteditems	enumerater   r;   r   appendr   from_collectionsheadr   )r   r   r   namepartsr7   dskrU   rX   idivindexergraphr   r   r   rF      s    
z_LocIndexer._loc_listc                 C  s   dt || j }| |}|| jjd k s|| jjd kr%tdt| |dftj| j|ft	|||fi}| 
||}tj||| jgd}t|||||gdS )NrP   r   rQ   z"the label [%s] is not in the indexrR   rT   )r   r   rV   rU   rK   strr   r;   r   r0   r   r   r[   r   )r   r   r   r]   partr_   r7   rc   r   r   r   rI      s   
 	z_LocIndexer._loc_elementc                 C  s.   t |ts	t|rt| jj|S t| jj|S r   )r,   rH   r   _partitions_of_index_valuesr   rU   _partition_of_index_value)r   keysr   r   r   rV      s   z_LocIndexer._get_partitionsc                 C  s   t | jj|S r   )_coerce_loc_indexr   rU   )r   r4   r   r   r   ri      s   z_LocIndexer._coerce_loc_indexc                 C  s  dt |||  }t|tsJ |jdv sJ |jd ur"| |j}nd}|jd ur0| |j}n| jjd }|jd u rU| jj	rU|jd u rJ| jj
d n	t| jj
d |j}n| |j}|jd u rz| jj	rz|jd u ro| jj
d n	t| jj
d |j}n| |j}||kr|dftj| j|ft|j|j|fi}||g}	n|dftj| j|ft|jd |fi}td|| D ]%}
|d u r| j||
 f|||
f< qtj| j||
 ftd d |f|||
f< qtj| j|ftd |j|f|||| f< |jd u r| jj
d }n	t|| jj
| }|jd u r| jj
d }nt|| jj
|d  }|f| jj
|d |d   |f }	t|	t|d ks;J | ||}tj||| jgd}t||||	dS )NrP   )Nr<   r   r<   rQ   rR   rT   )r   r,   r0   stepstartrV   stopr   ZnpartitionsrB   rU   minri   maxr   r;   r   ranger.   r   r   r[   r   )r   r   r   r]   rk   rl   istartistopr_   rU   r`   Z	div_startZdiv_stopr7   rc   r   r   r   rD      s|   







"z_LocIndexer._loc_sliceN)r    r"   r#   __doc__r$   r   r3   r>   rC   r?   r@   rF   rI   rV   ri   rD   r   r   r   r   r:   S   s    
(	r:   c                 C  sJ   | d du rd}t |t| |}t| |}tt| d td|d S )a'  In which partition does this value lie?

    >>> _partition_of_index_value([0, 5, 10], 3)
    0
    >>> _partition_of_index_value([0, 5, 10], 8)
    1
    >>> _partition_of_index_value([0, 5, 10], 100)
    1
    >>> _partition_of_index_value([0, 5, 10], 5)  # left-inclusive divisions
    1
    r   N4Can not use loc on DataFrame without known divisionsr*   r<   )r/   ri   bisectbisect_rightrm   r.   rn   )rU   valr5   r`   r   r   r   rg   8  s   
rg   c                 C  sd   | d du rd}t |tt}|D ]}t| |}tt| d td|d }|| | q|S )aS  Return defaultdict of division and values pairs
    Each key corresponds to the division which values are index values belong
    to the division.

    >>> sorted(_partitions_of_index_values([0, 5, 10], [3]).items())
    [(0, [3])]
    >>> sorted(_partitions_of_index_values([0, 5, 10], [3, 8, 5]).items())
    [(0, [3]), (1, [8, 5])]
    r   Nrs   r*   r<   )	r/   r   rH   rt   ru   rm   r.   rn   rZ   )rU   rG   r5   resultsrv   r`   ra   r   r   r   rf   L  s   
rf   c                 C  sJ   | rt | d trt|S | r#t | d tjr#t|| d jS |S )zxTransform values to be comparable against divisions

    This is particularly valuable to use with pandas datetimes
    r   )r,   r   pd	TimestamprJ   Z
datetime64ZastyperE   )rU   or   r   r   ri   b  s
   
ri   c                 C  s   t | sJ t| tjtjfs|S t|tr=t|jtr$| |jd}n|j}t|j	tr5| |j	d}n|j	}t||S t|trY| |d}| |d}tt
||t||S |S )z`
    Convert indexer for partial string selection
    if data has DatetimeIndex/PeriodIndex
    leftright)r   r,   rx   ZDatetimeIndexZPeriodIndexr0   rk   rd   Z_maybe_cast_slice_boundrl   rm   rn   )rL   rb   rk   rl   r   r   r   rC   n  s    


rC   )&
__future__r   rt   collectionsr   r   numpyrJ   Zpandasrx   Zpandas.api.typesr   Zdask.array.corer   Z	dask.baser   Zdask.dataframer   Zdask.dataframe._compatr	   Zdask.dataframe.corer
   r   Zdask.dataframe.utilsr   r   r   Zdask.highlevelgraphr   Z
dask.utilsr   r   r%   r:   rg   rf   ri   rC   r   r   r   r   <module>   s.    % f