§
    ÖDUfd<  ã                   ó$  — d dl Z d dlZd dlmZ d dlmZmZ d dlZd dlZ	ddl
mZmZmZmZmZmZmZmZ 	 d dlZdZn# e$ r dZY nw xY were	 	 	 d dej        d	ed
ede	j        f         dedej        f
d„¦   «         Ze	 	 	 d de	j        d	ed
ede	j        f         dede	j        f
d„¦   «         Ze	 	 	 d deej        e	j        f         d	ed
ede	j        f         dedeej        e	j        f         f
d„¦   «         Ze                     ej        ¦  «        	 	 	 d dej        d	ed
ede	j        f         dedej        f
d„¦   «         Z	 	 	 d!dedededededdfd„Zn1e	 	 	 d de	j        d	ed
ede	j        f         dede	j        f
d„¦   «         Ze                     e	j        ¦  «        	 	 	 d de	j        d	ed
ede	j        f         dede	j        f
d„¦   «         Z ej         ddd¬¦  «        de	j        de	j        de	j        d
e	j        de	j        f
d„¦   «         Z!dS )"é    N)Úsingledispatch)ÚoverloadÚUnioné   )ÚTempFileHolderÚglue_csvÚglue_hdfÚglue_parquetÚ	parse_csvÚ	parse_hdfÚparse_parquetÚ_parallel_argsortTFÚdataÚaxisÚtargetÚncpusÚreturnc                 ó   — d S ©N© ©r   r   r   r   s       úU/var/www/html/software/conda/lib/python3.11/site-packages/qnorm/quantile_normalize.pyÚquantile_normalizer      s	   € ð
 /2¨có    c                 ó   — d S r   r   r   s       r   r   r   &   s	   € ð
 -0¨Cr   c                 ó@   — t          dt          | ¦  «        › ¦  «        ‚)aÔ  
        Quantile normalize your array/dataframe.

        It does quantile normalization in the "correct" way in the sense that
        it takes the mean of duplicate values instead of ignoring them.

        Args:
            data: numpy.ndarray or pandas.DataFrame to be normalized
            axis: axis along to normalize. Axis=1 (default) normalizes each
                  column/sample which gives them identical distributions.
                  Axis=0 normalizes each row/feature giving them all identical
                  distributions.
            target: distribution to normalize onto
            ncpus: number of cpus to use for normalization

        Returns: a quantile normalized copy of the input.
        ú,quantile_normalize not implemented for type ©ÚNotImplementedErrorÚtyper   s       r   r   r   .   ó'   € õ0 "ØG½4À¹:¼:ÐGÐGñ
ô 
ð 	
r   c                 ó  — |                       ¦   «         }|dk    r5t          |j                             t          ¦  «        |||¦  «        |d d …<   n4t          |j                             t          ¦  «        |||¦  «        |d d …<   |S )Nr   )ÚcopyÚquantile_normalize_npÚvaluesÚastypeÚfloat)r   r   r   r   Úqn_datas        r   Úquantile_normalize_pdr)   J   s‡   € ð —)’)‘+”+ˆð 1Š9ˆ9Ý.Ø”×%Ò%¥eÑ,Ô,¨d°F¸Eñô ˆGAAA‰JˆJõ /Ø”×%Ò%¥eÑ,Ô,¨d°F¸Eñô ˆGAAA‰Jð ˆr   é † é   ÚinfileÚoutfileÚrowchunksizeÚcolchunksizec                 ó  ‡!‡"‡#— |                       d¦  «        rd}t          | ¦  «        \  Š!}nf|                       d¦  «        rd}t          | ¦  «        \  Š!}}n;|                       d¦  «        rd}t          | ¦  «        \  Š!}}}	nt	          d¦  «        ‚t          ‰!¦  «        }
t          |¦  «        }g }g }g }t          j        |¦  «        }t          ¦   «         5 }t          t          j        |
|z  ¦  «        ¦  «        D ]Ž}||z  t          j        |dz   |z  d	|
¦  «        }}|dk    rµt          j        | ¦  «        5 Š"t          ‰"                     ¦   «         ¦  «        dk    sJ ‚‰"                     ¦   «         d	         Š#ˆ!ˆ"ˆ#fd
„t          ||¦  «        D ¦   «         }t          j        |d¬¦  «                             d¦  «        }ddd¦  «         n# 1 swxY w Y   n{|dk    rQt          j        | |dd	d	gt'          t          |dz   |dz   ¦  «        ¦  «        ¢¬¦  «                             d¦  «        }n$|dk    rt          j        | ‰!||…         ¬¦  «        }t+          |j        ||j        j        ¦  «        \  }}~t          j        ||d	¬¦  «        }t          j        |d¬¦  «        }|||z
  ||z
  |z  z  z  }|                     |                     dd¬¦  «        ¦  «         |                     |                     dd¬¦  «        ¦  «         |                     |                     dd¬¦  «        ¦  «         t          j        |d         |¦  «         t          j        |d         |¦  «         t          j        |d         |¦  «         ~~~Œg }g }t          j        |t          j        t          |¦  «        |z  ¦  «        ¦  «        D ][}|                     |                     dd¬¦  «        ¦  «         t          j        |¦  «                             |d         d¬¦  «         Œ\|                     |¦  «         ~t          t          j        |
|z  ¦  «        ¦  «        D ]}t          j         ||         d¬¦  «        }t          j         ||         d¬¦  «        }t          j         ||         d¬¦  «        }tC          ||||¦  «        }~~~g }tE          t          j        |t          j        |j#        d	         |z  ¦  «        ¦  «        ¦  «        D ]M\  }}|                     d|› d|› dd¬¦  «        } |                     | ¦  «         t          j        | |¦  «         ŒN~~|                     |¦  «         ŒtH          j%         &                    |¦  «        rtI          j'        |¦  «         |dk    rtQ          |‰!|¦  «         n2|dk    rtS          |‰!||¦  «         n|dk    rtU          |‰!|||	¦  «         ddd¦  «         dS # 1 swxY w Y   dS )a  
        Memory-efficient quantile normalization implementation by splitting
        the task into sequential subtasks, and writing the intermediate results
        to disk instead of keeping them in memory. This makes the memory
        footprint independent of the input table, however also slower..

        Args:
            infile: path to input table. The table can be either a csv-like file
                of which the delimiter is auto detected. Or the infile can be a
                hdf file, which requires to be stored with format=table.
            outfile: path to the output table. Has the same layout and delimiter
                as the input file. If the input is csv-like, the output is csv-
                like. If the input is hdf, then the output is hdf.
            rowchunksize: how many rows to read/write at the same time when
                combining intermediate results. More is faster, but also uses
                more memory.
            colchunksize: how many columns to use at the same time when
                calculating the mean and normalizing. More is faster, but also
                uses more memory.
            ncpus: The number of cpus to use. Scales diminishingly, and more
                than four is generally not useful.
        )z.hdfz.h5Úhdf)z.csvz.tsvz.txtÚcsvz.parquetÚparquetzhOnly HDF ('.hdf', '.h5'), text ('.csv', '.tsv', '.txt'), and parquet ('.parquet') formats are supported.r   r   c                 óH   •— g | ]}‰                      ‰‰|         ¦  «        ‘ŒS r   )Úselect_column)Ú.0ÚiÚcolumnsr1   Úkeys     €€€r   ú
<listcomp>z2incremental_quantile_normalize.<locals>.<listcomp>¤   s=   ø€ ð  ð  ð  à !ð  ×-Ò-¨c°7¸1´:Ñ>Ô>ð ð  ð  r   ©r   Úfloat32Nú#)ÚsepÚcommentÚ	index_colÚusecols)r8   Úqnorm_z.npy)ÚprefixÚsuffixéÿÿÿÿz.p)ÚcompressionT)Úallow_pickleÚ_)+Úendswithr   r   r   r   ÚlenÚnpÚzerosr   ÚrangeÚmathÚceilÚclipÚpdÚHDFStoreÚkeysÚconcatr&   Úread_csvÚlistÚread_parquetr   r%   ÚdtypeÚtake_along_axisÚmeanÚappendÚget_filenameÚsaveÚarray_splitÚ	DataFrameÚ	to_pickleÚloadÚ_numba_accel_qnormÚ	enumerateÚshapeÚosÚpathÚexistsÚremover	   r   r
   )$r,   r-   r.   r/   r   Ú
dataformatÚindexÚ	delimiterÚ
index_usedÚschemaÚnr_colsÚnr_rowsÚtmp_valsÚtmp_sorted_valsÚtmp_idxsr   Útfhr7   Ú	col_startÚcol_endÚcolsÚdfr   Ú
sorted_idxÚsorted_valsÚ	rankmeansÚ	qnorm_tmpÚindex_tmpfilesÚchunkÚqnormedÚcol_tmpfilesÚjÚtmpfiler8   r1   r9   s$                                    @@@r   Úincremental_quantile_normalizer‚   _   s|  øøø€ ð: ?Š?˜?Ñ+Ô+ð 	ØˆJÝ& vÑ.Ô.‰NˆGUUØ_Š_Ð5Ñ6Ô6ð 	ØˆJÝ(1°&Ñ(9Ô(9Ñ%ˆGU˜I˜IØ_Š_˜jÑ*Ô*ð 	Ø"ˆJÝ1>¸vÑ1FÔ1FÑ.ˆGU˜J¨¨å%ðBñô ð õ g‘,”,ˆÝe‘*”*ˆð ˆØˆØˆõ ”˜'Ñ"Ô"ˆåÑÔð u	N å4œ9 W¨|Ñ%;Ñ<Ô<Ñ=Ô=ð :2ñ :2à˜Ñ$Ý”G˜Q ™U lÑ2°A°wÑ?Ô?ð #	ð
  Ò&Ð&Ýœ VÑ,Ô,ð G°Ý" 3§8¢8¡:¤:™œ°!Ò3Ð3Ð3Ð3Ø!Ÿhšh™jœj¨œm˜ð ð  ð  ð  ð  ð  å%*¨9°gÑ%>Ô%>ð ñ  ô  ˜õ  œY t°!Ð4Ñ4Ô4×;Ò;¸IÑFÔF˜ðGð Gð Gñ Gô Gð Gð Gð Gð Gð Gð Gøøøð Gð Gð Gð Gøð   5Ò(Ð(ÝœØØ%Ø #Ø"#Ø!"Ð M¥T­%°	¸A±¸wÈ¹{Ñ*KÔ*KÑ%LÔ%LÐ Mðñ ô ÷ ’f˜YÑ'Ô'ð Bð   9Ò,Ð,ÝœØ¨°	¸'Ð0AÔ(Bðñ ô Bõ
 $5Ø”I˜u b¤i¤oñ$ô $Ñ jð Ý Ô0ØØØðñ ô õ
 œG K°aÐ8Ñ8Ô8	ð ˜9 vÑ-Ø˜yÑ(¨WÑ5ññ ð
 —’Ø×$Ò$¨H¸VÐ$ÑDÔDñô ð ð  ×&Ò&Ø×$Ò$¨H¸VÐ$ÑDÔDñô ð ð —’Ø×$Ò$¨H¸VÐ$ÑDÔDñô ð õ ”˜ œ dÑ+Ô+Ð+Ý”˜¨Ô+¨[Ñ9Ô9Ð9Ý”˜ œ jÑ1Ô1Ð1Ø˜* k¡kð ˆIð  ˆNÝœØ•t”y¥ U¡¤¨lÑ!:Ñ;Ô;ñô ð ð ð ×%Ò%Ø×$Ò$¨H¸TÐ$ÑBÔBñô ð õ ”˜UÑ#Ô#×-Ò-Ø" 2Ô&°Dð .ñ ô ð ð ð ×Ò˜^Ñ,Ô,Ð,Øõ 4œ9 W¨|Ñ%;Ñ<Ô<Ñ=Ô=ð /ñ /å”w˜x¨œ{¸Ð>Ñ>Ô>ÝœW X¨a¤[¸tÐDÑDÔD
Ý œg o°aÔ&8ÀtÐLÑLÔLõ -Ø˜* k°6ñô ð ˜* kð  "Ý )Ý”NØ¥¤¨7¬=¸Ô+;¸lÑ+JÑ!KÔ!Kñô ñ!ô !ð 	,ð 	,‘HAuð
 "×.Ò.Ø0¨Ð0Ð0¨AÐ0Ð0Ð0¸ð /ñ ô Gð !×'Ò'¨Ñ0Ô0Ð0Ý”G˜G UÑ+Ô+Ð+Ð+Ø˜UØ× Ò  Ñ.Ô.Ð.Ñ.åŒw~Š~˜gÑ&Ô&ð #Ý”	˜'Ñ"Ô"Ð"ð ˜UÒ"Ð"Ý˜ '¨9Ñ5Ô5Ð5Ð5Ø˜uÒ$Ð$Ý˜ '¨9°iÑ@Ô@Ð@Ð@Ø˜yÒ(Ð(Ý˜W g¨y¸*ÀfÑMÔMÐMðku	Nð u	Nð u	Nñ u	Nô u	Nð u	Nð u	Nð u	Nð u	Nð u	Nð u	Nð u	Nøøøð u	Nð u	Nð u	Nð u	Nð u	Nð u	Ns9   ÃA#XÄ=B	GÇXÇGÇXÇGÇPXØXØXc                 ó@   — t          dt          | ¦  «        › ¦  «        ‚)aÊ  
        Quantile normalize your array.

        It does quantile normalization in the "correct" way in the sense that
        it takes the mean of duplicate values instead of ignoring them.

        Args:
            data: numpy.ndarray or pandas.DataFrame to be normalized
            axis: axis along to normalize. Axis=1 (default) normalizes each
                  column/sample which gives them identical distributions.
                  Axis=0 normalizes each row/feature giving them all identical
                  distributions.
            target: distribution to normalize onto
            ncpus: number of cpus to use for normalization

        Returns: a quantile normalized copy of the input.
        r   r   r   s       r   r   r     r!   r   Ú_datac                 ó  ‡ — t          j        ‰ j        t           j        ¦  «        st	          d‰ j        › d¦  «        ‚t          ˆ fd„t           j        t           j        fD ¦   «         ¦  «        rt           j        }nt           j        }|dk    rt          j	        ‰ ¦  «        Š n|dk    rnt	          d|› d¦  «        ‚|dk    r-‰  
                    |¬¦  «        }t          j        |d¬	¦  «        }n*|dk    rt          ‰ ||¦  «        \  }}nt	          d
¦  «        ‚t          j        ||d¬	¦  «        }|€t          j        |d¬	¦  «        }nt          |t           j        ¦  «        s3	 t          j        |¦  «        }n# t$          $ r t	          d¦  «        ‚w xY w|j        dk    rt	          d|j        › d¦  «        ‚|j        d         |j        d         k    r,t	          d|j        d         › d|j        d         › d¦  «        ‚t          j        |j        t           j        ¦  «        st	          d|j        › d¦  «        ‚t          j        | 
                    |¬¦  «        ¦  «        }t-          ||||¦  «        }|dk    r|j        }|S )NzThe type of your data (z[) is is not supported, and might lead to undefined behaviour. Please use numeric data only.c              3   óL   •K  — | ]}t          j        ‰j        |¦  «        V — Œd S r   )rK   Ú
issubdtyperX   )r6   rX   r„   s     €r   ú	<genexpr>z(quantile_normalize_np.<locals>.<genexpr>>  sB   øè è € ð ð Ø.3Œe”k 5Ñ)Ô)ðð ð ð ð ð r   r   r   z`qnorm only supports 2 dimensional data, so the axishas to be either 0 or 1, but you set axis to ú.)rX   r;   z2The number of cpus needs to be a positive integer.z5The target could not be converted to a numpy.ndarray.zWThe target array should be a 1-dimensionsal vector, however you supplied a vector with z dimensionsz=The target array does not contain the same amount of values (z) as the data contains rows (ú)zThe type of your target ()rK   r‡   rX   ÚnumberÚ
ValueErrorÚanyÚint32r<   Úfloat64Ú	transposer&   Úargsortr   rY   rZ   Ú
isinstanceÚndarrayÚarrayÚ	ExceptionÚndimrd   Úsortrb   ÚT)	r„   r   r   r   rX   r   rx   Ú
sorted_valÚ	final_ress	   `        r   r$   r$   /  s  ø€ õ Œ=˜œ¥b¤iÑ0Ô0ð Ýð- e¤kð -ð -ð -ñ
ô 
ð 	
õ 
ð ð ð ð Ý8:¼Å"Ä*Ð7Mðñ ô ñ 
ô 
ð õ ”
ˆˆå”
ˆð ˆq‚y€yÝ”˜UÑ#Ô#ˆˆØ	ŠˆØåðàðð ð ñ
ô 
ð 	
ð ‚z€zà|Š| %ˆ|Ñ(Ô(ˆõ ”Z ¨1Ð-Ñ-Ô-ˆ
ˆ
Ø	Šˆå,¨U°E¸5ÑAÔAÑˆˆjˆjåÐMÑNÔNÐNåÔ# D¨*¸1Ð=Ñ=Ô=€Jà€~å”˜¨!Ð,Ñ,Ô,ˆ‰õ ˜&¥"¤*Ñ-Ô-ð 	ðÝœ &Ñ)Ô)øÝð ð ð Ý ØNñô ð ðøøøð Œ;˜!ÒÐÝðGØ.4¬kðGð Gð Gñô ð ð Œ<˜Œ?˜dœj¨œmÒ+Ð+Ýð%Ø”L ”Oð%ð %à”J˜q”Mð%ð %ð %ñô ð õ
 Œ}˜Vœ\­2¬9Ñ5Ô5ð 	Ýð1¨D¬Jð 1ð 1ð 1ñô ð õ
 ”˜Ÿš¨U˜Ñ3Ô3Ñ4Ô4ˆå" 4¨°ZÀÑHÔH€IØˆq‚y€yØ”Kˆ	ØÐs   Å&E; Å;F)ÚnopythonÚfastmathÚcacheÚqnormrx   r™   c                 óœ  — | j         d         }| j         d         }t          |¦  «        D ]¡}d}||k     r—d}d}	||z   |k     rN|||f         |||z   |f         k    r5|	|||z            z  }	|dz  }||z   |k     r|||f         |||z   |f         k    °5|dk    r+|	|z  }	t          |¦  «        D ]}
|||
z   |f         }|	| ||f<   Œ||z  }||k     °—Œ¢| S )z9
    numba accelerated "actual" qnorm normalization.
    r   r   g        )rd   rM   )rž   rx   r™   r   Ún_rowsÚn_colsÚcol_ir7   ÚnÚvalr€   Úidxs               r   rb   rb   …  s,  € ð Œ[˜Œ^€FØŒ[˜Œ^€Fåv‘”ð ð ˆØˆð &ŠjˆjØˆAØˆCð
 A‘˜’Ø˜q %˜xÔ(¨J°q¸1±u¸e°|Ô,DÒDÐDàv˜a !™e”}Ñ$ØQ‘ð	 A‘˜’Ø˜q %˜xÔ(¨J°q¸1±u¸e°|Ô,DÒDÐDð 1ŠuˆuØq‘Ý˜q™œð ,ð ,AØ$ Q¨¡U¨E \Ô2CØ(+E˜#˜u˜*Ñ%Ð%à‰FˆAð) &Šjˆjøð, €Lr   )r   Nr   )r*   r+   r   )"re   rN   Ú	functoolsr   Útypingr   r   ÚnumbaÚnumpyrK   Úutilr   r   r	   r
   r   r   r   r   ÚpandasrQ   Úpandas_importÚModuleNotFoundErrorr_   Úintr“   r   Úregisterr)   Ústrr‚   r$   Újitrb   r   r   r   ú<module>r²      sÀ  ðØ 	€	€	€	Ø €€€Ø $Ð $Ð $Ð $Ð $Ð $Ø "Ð "Ð "Ð "Ð "Ð "Ð "Ð "à €€€Ø Ð Ð Ð ð	ð 	ð 	ð 	ð 	ð 	ð 	ð 	ð 	ð 	ð 	ð 	ð 	ð 	ð 	ð 	ð 	ð 	ð 	ð 	ðØÐÐÐà€M€MøØð ð ð Ø€M€M€Mðøøøð ñ P
ð à'(Ø=AØ()ð2ð 2 ¤ð 2Ø!$ð2à#(¨¨r¬zÐ)9Ô#:ð2ð #&ð2ð !#¤ð	2ð 2ð 2ñ „Xð2ð à'(Ø=AØ()ð0ð 0 ¤ð 0Ø!$ð0à#(¨¨r¬zÐ)9Ô#:ð0ð #&ð0ð !#¤
ð	0ð 0ð 0ñ „Xð0ð ð Ø*.Øð	
ð 
ØB”L "¤*Ð,Ô-ð
àð
ð d˜BœJÐ&Ô'ð
ð ð	
ð
 
ˆrŒ|˜RœZÐ'Ô	(ð
ð 
ð 
ñ „^ð
ð6 × Ò  ¤Ñ.Ô.ð Ø*.Øð	ð ØŒlðàðð d˜BœJÐ&Ô'ðð ð	ð
 
Œðð ð ñ /Ô.ðð. $ØØðnNð nNØðnNàðnNð ðnNð ð	nNð
 ðnNð 
ðnNð nNð nNð nNð nNðf ð Ø*.Øð	
ð 
ØŒjð
àð
ð d˜BœJÐ&Ô'ð
ð ð	
ð
 
Œð
ð 
ð 
ñ „^ð
ð8 ×Ò˜RœZÑ(Ô(ð Ø&*Øð	Rð RØŒ:ðRà
ðRð $˜œ
Ð"Ô#ðRð ð	Rð
 „ZðRð Rð Rñ )Ô(ðRðj €„D 4¨tÐ4Ñ4Ô4ð'ØŒ:ð'à”
ð'ð ”
ð'ð ŒJð	'ð
 „Zð'ð 'ð 'ñ 5Ô4ð'ð 'ð 's   ´; »AÁA