
    tf#                     X    d dl Zd dlZd dlZd dlZd dlmZ	  G d d      Z
 G d d      Zy)    Nc                   l    e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zy)ParallelDatac                     || _         y Ndata)selfr   s     W/var/www/html/software/conda/envs/higlass/lib/python3.12/site-packages/clodius/fpark.py__init__zParallelData.__init__	   s	    	    c                 ^    t        | j                  D cg c]
  } ||       c}      S c c}w r   )r   r   r	   funcds      r
   mapzParallelData.map   s#    dii8T!W8998s   *c                 ,    t        | j                        S r   )lenr   r	   s    r
   countzParallelData.count   s    499~r   c                 B    | j                   |j                   z   | _         | S r   r   )r	   
other_datas     r
   unionzParallelData.union   s    II
/	r   c                      y)N    r   s    r
   getNumPartitionszParallelData.getNumPartitions   s    r   c                 z    t        j                  t              }| j                  D ]  }||d   xx   dz  cc<    |S )zE
        Count the number of elements with a particular key.
        r   r   )coldefaultdictintr   )r	   countsr   s      r
   
countByKeyzParallelData.countByKey   s>     % 	A1Q4LAL	 r   c                     | S r   r   )r	   nums     r
   coalescezParallelData.coalesce$   s    r   c                     t        j                  t              }| j                  D ]  }||d      j	                  |d           t        |j                               S )zh
        When called on a dataset of (K, V) pairs, returns a dataset of (K, Iterable<V>) pairs.
        r   r   )r   r   listr   appendr   items)r	   bucketsr   s      r
   
groupByKeyzParallelData.groupByKey'   sR     //$' 	'AAaDM  1&	' GMMO,,r   c                     t        || j                        }t        t        t        j
                  j                  |                  S )zd
        Flatten a list of results mapped to the data
        and return as one large list.
        )r   r   r   r'   itchainfrom_iterable)r	   r   results      r
   flatMapzParallelData.flatMap2   s3    
 T499%D!7!7!?@AAr   c                 6    | j                   D ]
  } ||        y r   r   r   s      r
   foreachzParallelData.foreach;   s     	AG	r   c                 (     || j                          y r   r   r	   r   s     r
   foreachPartitionzParallelData.foreachPartition?   s    TYYr   c                    t        j                  t              }| j                  D ]  }||d      j	                  |d           t               }|D ]  }t        j                  |||         ||<     t        |j                               S )aV  
        When called on a dataset of (K, V) pairs, returns a dataset of (K, V)
        pairs where the values for each key are aggregated using the given
        reduce function func, which must be of type (V,V) => V. Like in
        groupByKey, the number of reduce tasks is configurable through an
        optional second argument.
        r   r   )
r   r   r'   r   r(   dictftreducer   r)   )r	   r   r*   r   reduced_bucketskeys         r
   reduceByKeyzParallelData.reduceByKeyB   s     //$' 	'AAaDM  1&	' & 	AC#%99T73<#@OC 	A O11344r   c                 .   t        j                  t              }| j                  D ]  }||d      j	                  |d           t               }|D ]*  }|j                         }||   D ]  }	 |||	      } |||<   , t        |j                               S )Nr   r   )	r   r   r'   r   r(   r8   copyr   r)   )
r	   	start_valseq_func	comb_funcr*   r   r;   r<   comb_valvals
             r
   aggregateByKeyzParallelData.aggregateByKeyT   s    //$' 	'AAaDM  1&	' & 	,C ~~'Hs| 3#Hc23#+OC 		, O11344r   c                 B    t        j                  || j                        S r   )r9   r:   r   r5   s     r
   r:   zParallelData.reduceb   s    yytyy))r   c                     | j                   S r   r   r   s    r
   collectzParallelData.collecte   s    yyr   c                      | j                   d | S r   r   )r	   ns     r
   takezParallelData.takeh   s    yy!}r   N)__name__
__module____qualname__r   r   r   r   r   r"   r%   r+   r1   r3   r6   r=   rE   r:   rH   rK   r   r   r
   r   r      sS    :		-B5$5*r   r   c                   F    e Zd ZdZd Zed        Zed        Zed        Zy)FakeSparkContextz6
    Emulate a SparkContext for local processing.
    c                      y r   r   r   s    r
   r   zFakeSparkContext.__init__q   s    r   c                     t        |       S r   )r   r   s    r
   parallelizezFakeSparkContext.parallelizet   s    D!!r   c           
          t        | d      5 }t        t        t        d |j	                                           cddd       S # 1 sw Y   yxY w)z
        Load a single file as a ParallelData set.

        :param filename: A file to be loaded line by line
        @return: A ParallelData object wrapping the lines in the file.
        rc                 "    | j                         S r   )strip)xs    r
   <lambda>z1FakeSparkContext.singleTextFile.<locals>.<lambda>   s    1779 r   N)openr   r'   r   	readlines)filenamefs     r
   singleTextFilezFakeSparkContext.singleTextFilex   sD     (C  	OAS)<akkm%L MN	O 	O 	Os   -AAc                 ,   t        j                  |       rkt        j                  t        j                  | d            }t        j                  g       }|D ]&  } |j                  t        j                  |             }( |S t        j                  |       S )a{  
        Load a filename as a text file. Filename can be either a single file
        or a directory containing a multitude of 'part-*' files.

        :param filename: The name of the file (or directory) containing the data which
                         we want to load one by one
        :return: A ParallelData object containing all of the lines of the file or files
        zpart-*)opisdirglobjoinrP   rS   r   r^   )r\   parts_filesps      r
   textFilezFakeSparkContext.textFile   sz     88H))BGGHh$?@K ,,R0A' GGG,;;HEFGH#228<<r   N)	rL   rM   rN   __doc__r   staticmethodrS   r^   rf   r   r   r
   rP   rP   l   sJ     " " O O = =r   rP   )collectionsr   	functoolsr9   rb   	itertoolsr-   os.pathpathr`   r   rP   r   r   r
   <module>rn      s+        a aH)= )=r   