o
    NrfX                     @  s   d dl mZ d dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ dddZdd	d
Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdS )    )annotationsN)partial)islice)Bagc                 C     t | ||d}|t|S )a  Chooses k unique random elements from a bag.

    Returns a new bag containing elements from the population while
    leaving the original population unchanged.

    Parameters
    ----------
    population: Bag
        Elements to sample.
    k: integer, optional
        Number of elements to sample.
    split_every: int (optional)
        Group partitions into groups of this size while performing reduction.
        Defaults to 8.

    Examples
    --------
    >>> import dask.bag as db
    >>> from dask.bag import random
    >>> b = db.from_sequence(range(5), npartitions=2)
    >>> list(random.sample(b, 3).compute())  # doctest: +SKIP
    [1, 3, 5]
    
populationksplit_every)_samplemap_partitions_finalize_sampler   r	   r
   res r   X/var/www/html/software/conda/envs/catlas/lib/python3.10/site-packages/dask/bag/random.pysample   s   r      c                 C  r   )a7  
    Return a k sized list of elements chosen with replacement.

    Parameters
    ----------
    population: Bag
        Elements to sample.
    k: integer, optional
        Number of elements to sample.
    split_every: int (optional)
        Group partitions into groups of this size while performing reduction.
        Defaults to 8.

    Examples
    --------
    >>> import dask.bag as db
    >>> from dask.bag import random
    >>> b = db.from_sequence(range(5), npartitions=2)
    >>> list(random.choices(b, 3).compute())  # doctest: +SKIP
    [1, 1, 5]
    r   )_sample_with_replacementr   r   r   r   r   r   choices(   s   r   c                 C  s   g }g }d}| D ]}|\}}| | ||7 }t|}	|||	f q||kr-|s-||fS g }
|D ]\}}	|	dkrF||	|  }|
|g|	 7 }
q1|rLtjnt}|||
|d|fS )aq  
    Reduce function used on the sample and choice functions.

    Parameters
    ----------
    reduce_iter : iterable
        Each element is a tuple coming generated by the _sample_map_partitions function.
    replace: bool
        If True, sample with replacement. If False, sample without replacement.

    Returns a sequence of uniformly distributed samples;
    r   )r   weightsr	   )extendlenappendrndr   &_weighted_sampling_without_replacement)reduce_iterr	   replaceZns_kssniZs_iZn_iZk_ipZp_iZsample_funcr   r   r   _sample_reduceB   s&   
r"   c                   s4   fddt tD } fddt||D S )zk
    Source:
        Weighted random sampling with a reservoir, Pavlos S. Efraimidis, Paul G. Spirakis
    c                   s&   g | ]}t t  |  |fqS r   )mathlogr   random).0r    )r   r   r   
<listcomp>m   s   & z:_weighted_sampling_without_replacement.<locals>.<listcomp>c                   s   g | ]} |d   qS )r   r   )r&   x)r   r   r   r'   n   s    )ranger   heapqnlargest)r   r   r	   eltr   )r   r   r   r   h   s   r   c                 C  s4   |dk rt d| jtt|dtt|ddt|dS )Nr   z(Cannot take a negative number of samplesr	   Fr	   r   Zout_typer
   )
ValueError	reductionr   _sample_map_partitionsr"   r   r   r   r   r   r   q   s   
r   c                 C  s    | d }t ||k rtd|S )Nr   zSample larger than population)r   r0   )r   r	   r   r   r   r   r   |   s   r   c           	      C  s   g d}}t | }t||D ]}|| |d7 }qttt | }|d t| }t	||D ]'\}}||krV||t
|< |ttt | 9 }|t|7 }|d7 }q3||fS )z
    Reservoir sampling strategy based on the L algorithm
    See https://en.wikipedia.org/wiki/Reservoir_sampling#An_optimal_algorithm
    r   r   )iterr   r   r#   expr$   r   r%   
_geometric	enumerate	randrange)	r   r	   	reservoirstream_lengthstreamewnxtr    r   r   r   r2      s   



r2   c                 C  s$   | j tt|dtt|ddt|dS )Nr-   Tr.   r/   )r1   r   '_sample_with_replacement_map_partitionsr"   r   r   r   r   r   r      s   
r   c                   s   t | }t|  fddt|D d}}dd t|D }dd |D }t|}t|dD ]7\} ||krbt|D ]"\}	}
|
|kr] ||	< ||	  t 9  < ||	  t||	 7  < q;t|}|d7 }q/||fS )z
    Reservoir sampling with replacement, the main idea is to use k reservoirs of size 1
    See Section Applications in http://utopia.duth.gr/~pefraimi/research/data/2007EncOfAlg.pdf
    c                   s   g | ]} qS r   r   r&   _r;   r   r   r'      s    z;_sample_with_replacement_map_partitions.<locals>.<listcomp>r   c                 S  s   g | ]}t  qS r   )r   r%   r?   r   r   r   r'          c                 S  s   g | ]}t |qS r   )r5   )r&   Zwir   r   r   r'      rB   )r3   nextr)   minr6   r   r%   r5   )r   r	   r:   r8   r9   r<   r=   Zmin_nxtr    jr   r   rA   r   r>      s"   
r>   c                 C  s(   t ttddtd|   d S )Nr   r   )intr#   r$   r   uniform)r!   r   r   r   r5      s   (r5   )N)r   N)
__future__r   r*   r#   r%   r   	functoolsr   	itertoolsr   Zdask.bag.corer   r   r   r"   r   r   r   r2   r   r>   r5   r   r   r   r   <module>   s"    

&		