
    >ieX                        d dl mZ d dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ ddZddZd	 Zd
 Zd Zd Zd Zd Zd Zd ZdS )    )annotationsN)partial)islice)Bagc                \    t          | ||          }|                    t          |          S )a  Chooses k unique random elements from a bag.

    Returns a new bag containing elements from the population while
    leaving the original population unchanged.

    Parameters
    ----------
    population: Bag
        Elements to sample.
    k: integer, optional
        Number of elements to sample.
    split_every: int (optional)
        Group partitions into groups of this size while performing reduction.
        Defaults to 8.

    Examples
    --------
    >>> import dask.bag as db
    >>> from dask.bag import random
    >>> b = db.from_sequence(range(5), npartitions=2)
    >>> list(random.sample(b, 3).compute())  # doctest: +SKIP
    [1, 3, 5]
    
populationksplit_every)_samplemap_partitions_finalize_sampler	   r
   r   ress       /lib/python3.11/site-packages/dask/bag/random.pysampler      s/    0 Z1+
F
F
FC.222       c                \    t          | ||          }|                    t          |          S )a7  
    Return a k sized list of elements chosen with replacement.

    Parameters
    ----------
    population: Bag
        Elements to sample.
    k: integer, optional
        Number of elements to sample.
    split_every: int (optional)
        Group partitions into groups of this size while performing reduction.
        Defaults to 8.

    Examples
    --------
    >>> import dask.bag as db
    >>> from dask.bag import random
    >>> b = db.from_sequence(range(5), npartitions=2)
    >>> list(random.choices(b, 3).compute())  # doctest: +SKIP
    [1, 1, 5]
    r   )_sample_with_replacementr   r   r   s       r   choicesr   (   s/    , #jA;
W
W
WC.222r   c                F   g }g }d}| D ]G}|\  }}|                     |           ||z  }t          |          }	|                    ||	f           H||k    r|s||fS g }
|D ]\  }}	|	dk    r||	|z  z  }|
|g|	z  z  }
|rt          j        nt
          } |||
|          |fS )aq  
    Reduce function used on the sample and choice functions.

    Parameters
    ----------
    reduce_iter : iterable
        Each element is a tuple coming generated by the _sample_map_partitions function.
    replace: bool
        If True, sample with replacement. If False, sample without replacement.

    Returns a sequence of uniformly distributed samples;
    r   )r	   weightsr
   )extendlenappendrndr   &_weighted_sampling_without_replacement)reduce_iterr
   replacens_kssnis_in_ik_ipp_isample_funcs                r   _sample_reducer+   B   s     E
A	A ! !
c		S#hhc3Z    1uuWu!t 	A  S77q/C#A!(T#++.TK;!Q!444a77r   c                     fdt          t                              D             } fdt          j        ||          D             S )zk
    Source:
        Weighted random sampling with a reservoir, Pavlos S. Efraimidis, Paul G. Spirakis
    c                p    g | ]2}t          j        t          j                              |         z  |f3S  )mathlogr   random).0r$   r   s     r   
<listcomp>z:_weighted_sampling_without_replacement.<locals>.<listcomp>m   s7    
Q
Q
QDHSZ\\""WQZ/3
Q
Q
Qr   c                ,    g | ]}|d                   S )r   r.   )r2   xr	   s     r   r3   z:_weighted_sampling_without_replacement.<locals>.<listcomp>n   s"    ===Jqt===r   )ranger   heapqnlargest)r	   r   r
   elts   ``  r   r   r   h   sT    
 R
Q
Q
QU3w<<=P=P
Q
Q
QC====enQ&<&<====r   c                    |dk     rt          d          |                     t          t          |          t          t          |d          t
          |          S )Nr   z(Cannot take a negative number of samplesr
   Fr
   r    out_typer   )
ValueError	reductionr   _sample_map_partitionsr+   r   r   s      r   r   r   q   s`    1uuCDDD&!,,,!U333	     r   c                Z    | d         }t          |          |k     rt          d          |S )Nr   zSample larger than population)r   r?   )r   r
   r   s      r   r   r   |   s.    ^F
6{{Q8999Mr   c                6   g d}}t          |           }t          ||          D ]}|                    |           |dz  }t          j        t          j        t          j                              |z            }|dz
  t          |          z   }t          ||          D ]v\  }}||k    rf||t          j
        |          <   |t          j        t          j        t          j                              |z            z  }|t          |          z  }|dz  }w||fS )z
    Reservoir sampling strategy based on the L algorithm
    See https://en.wikipedia.org/wiki/Reservoir_sampling#An_optimal_algorithm
    r   r   )iterr   r   r/   expr0   r   r1   
_geometric	enumerate	randrange)	r	   r
   	reservoirstream_lengthstreamewnxtr$   s	            r   rA   rA      s     "1}I*FFA  #*,,''!+,,Aq5JqMM
!C&!$$  188*+IcmA&&'$(3:<<0014555A:a== Cm##r   c                    |                      t          t          |          t          t          |d          t          |          S )Nr;   Tr<   r=   )r@   r   '_sample_with_replacement_map_partitionsr+   r   r   s      r   r   r      sH    71===!T222	     r   c           	        t          |           }t          |          fdt          |          D             d}}d t          |          D             }d |D             }t          |          }t	          |d          D ]\  }||k    rst	          |          D ]T\  }	}
|
|k    rI||	<   ||	xx         t          j                    z  cc<   ||	xx         t          ||	                   z  cc<   Ut          |          }|dz  }||fS )z
    Reservoir sampling with replacement, the main idea is to use k reservoirs of size 1
    See Section Applications in http://utopia.duth.gr/~pefraimi/research/data/2007EncOfAlg.pdf
    c                    g | ]}S r.   r.   )r2   _rL   s     r   r3   z;_sample_with_replacement_map_partitions.<locals>.<listcomp>   s    444a444r   r   c                4    g | ]}t          j                    S r.   )r   r1   )r2   rS   s     r   r3   z;_sample_with_replacement_map_partitions.<locals>.<listcomp>   s    (((!(((r   c                ,    g | ]}t          |          S r.   )rF   )r2   wis     r   r3   z;_sample_with_replacement_map_partitions.<locals>.<listcomp>   s    
&
&
&b:b>>
&
&
&r   )rD   nextr6   minrG   r   r1   rF   )r	   r
   rK   rI   rJ   rM   rN   min_nxtr$   jr#   rL   s              @r   rP   rP      s)    *FVA4444588444a}I((uQxx(((A
&
&A
&
&
&C#hhG&!$$ 	 	1<<!# / /1<<#$IaLaDDDCJLL(DDDFFFj1...FFF#hhGm##r   c                    t          t          j        t          j        dd                    t          j        d| z
            z            dz   S )Nr   r   )intr/   r0   r   uniform)r(   s    r   rF   rF      s<    txAq))**TXa!e__<==AAr   )N)r   N)
__future__r   r7   r/   r1   r   	functoolsr   	itertoolsr   dask.bag.corer   r   r   r+   r   r   r   rA   r   rP   rF   r.   r   r   <module>rb      s   " " " " " "                        3 3 3 383 3 3 34#8 #8 #8L> > >    $ $ $2  $ $ $8B B B B Br   