
    DUfd<                     $   d dl Z d dlZd dlmZ d dlmZmZ d dlZd dlZ	ddl
mZmZmZmZmZmZmZmZ 	 d dlZdZn# e$ r dZY nw xY were	 	 	 d dej        d	ed
ede	j        f         dedej        f
d            Ze	 	 	 d de	j        d	ed
ede	j        f         dede	j        f
d            Ze	 	 	 d deej        e	j        f         d	ed
ede	j        f         dedeej        e	j        f         f
d            Ze                    ej                  	 	 	 d dej        d	ed
ede	j        f         dedej        f
d            Z	 	 	 d!dedededededdfdZn1e	 	 	 d de	j        d	ed
ede	j        f         dede	j        f
d            Ze                    e	j                  	 	 	 d de	j        d	ed
ede	j        f         dede	j        f
d            Z ej         ddd          de	j        de	j        de	j        d
e	j        de	j        f
d            Z!dS )"    N)singledispatch)overloadUnion   )TempFileHolderglue_csvglue_hdfglue_parquet	parse_csv	parse_hdfparse_parquet_parallel_argsortTFdataaxistargetncpusreturnc                     d S N r   r   r   r   s       U/var/www/html/software/conda/lib/python3.11/site-packages/qnorm/quantile_normalize.pyquantile_normalizer      s	    
 /2c    c                     d S r   r   r   s       r   r   r   &   s	    
 -0Cr   c                 @    t          dt          |                      )a  
        Quantile normalize your array/dataframe.

        It does quantile normalization in the "correct" way in the sense that
        it takes the mean of duplicate values instead of ignoring them.

        Args:
            data: numpy.ndarray or pandas.DataFrame to be normalized
            axis: axis along to normalize. Axis=1 (default) normalizes each
                  column/sample which gives them identical distributions.
                  Axis=0 normalizes each row/feature giving them all identical
                  distributions.
            target: distribution to normalize onto
            ncpus: number of cpus to use for normalization

        Returns: a quantile normalized copy of the input.
        ,quantile_normalize not implemented for type NotImplementedErrortyper   s       r   r   r   .   '    0 "G4::GG
 
 	
r   c                    |                                  }|dk    r5t          |j                            t                    |||          |d d <   n4t          |j                            t                    |||          |d d <   |S )Nr   )copyquantile_normalize_npvaluesastypefloat)r   r   r   r   qn_datas        r   quantile_normalize_pdr)   J   s     ))++ 199.%%e,,dFE GAAAJJ /%%e,,dFE GAAAJ r   順    infileoutfilerowchunksizecolchunksizec                   !"# |                      d          rd}t          |           \  !}nf|                      d          rd}t          |           \  !}}n;|                      d          rd}t          |           \  !}}}	nt	          d          t          !          }
t          |          }g }g }g }t          j        |          }t                      5 }t          t          j        |
|z                      D ]}||z  t          j        |dz   |z  d	|
          }}|dk    rt          j        |           5 "t          "                                          dk    sJ "                                d	         #!"#fd
t          ||          D             }t          j        |d                              d          }ddd           n# 1 swxY w Y   n{|dk    rQt          j        | |dd	d	gt'          t          |dz   |dz                                                     d          }n$|dk    rt          j        | !||                   }t+          |j        ||j        j                  \  }}~t          j        ||d	          }t          j        |d          }|||z
  ||z
  |z  z  z  }|                    |                    dd                     |                    |                    dd                     |                    |                    dd                     t          j        |d         |           t          j        |d         |           t          j        |d         |           ~~~g }g }t          j        |t          j        t          |          |z                      D ][}|                    |                    dd                     t          j        |                              |d         d           \|                    |           ~t          t          j        |
|z                      D ]}t          j         ||         d          }t          j         ||         d          }t          j         ||         d          }tC          ||||          }~~~g }tE          t          j        |t          j        |j#        d	         |z                                D ]M\  }}|                    d| d| dd          } |                    |            t          j        | |           N~~|                    |           tH          j%        &                    |          rtI          j'        |           |dk    rtQ          |!|           n2|dk    rtS          |!||           n|dk    rtU          |!|||	           ddd           dS # 1 swxY w Y   dS )a  
        Memory-efficient quantile normalization implementation by splitting
        the task into sequential subtasks, and writing the intermediate results
        to disk instead of keeping them in memory. This makes the memory
        footprint independent of the input table, however also slower..

        Args:
            infile: path to input table. The table can be either a csv-like file
                of which the delimiter is auto detected. Or the infile can be a
                hdf file, which requires to be stored with format=table.
            outfile: path to the output table. Has the same layout and delimiter
                as the input file. If the input is csv-like, the output is csv-
                like. If the input is hdf, then the output is hdf.
            rowchunksize: how many rows to read/write at the same time when
                combining intermediate results. More is faster, but also uses
                more memory.
            colchunksize: how many columns to use at the same time when
                calculating the mean and normalizing. More is faster, but also
                uses more memory.
            ncpus: The number of cpus to use. Scales diminishingly, and more
                than four is generally not useful.
        )z.hdfz.h5hdf)z.csvz.tsvz.txtcsvz.parquetparquetzhOnly HDF ('.hdf', '.h5'), text ('.csv', '.tsv', '.txt'), and parquet ('.parquet') formats are supported.r   r   c                 H    g | ]}                     |                   S r   )select_column).0icolumnsr1   keys     r   
<listcomp>z2incremental_quantile_normalize.<locals>.<listcomp>   s=           !  --c71:>>     r   r   float32N#)sepcomment	index_colusecols)r8   qnorm_z.npy)prefixsuffixz.p)compressionT)allow_pickle_)+endswithr   r   r   r   lennpzerosr   rangemathceilclippdHDFStorekeysconcatr&   read_csvlistread_parquetr   r%   dtypetake_along_axismeanappendget_filenamesavearray_split	DataFrame	to_pickleload_numba_accel_qnorm	enumerateshapeospathexistsremover	   r   r
   )$r,   r-   r.   r/   r   
dataformatindex	delimiter
index_usedschemanr_colsnr_rowstmp_valstmp_sorted_valstmp_idxsr   tfhr7   	col_startcol_endcolsdfr   
sorted_idxsorted_vals	rankmeans	qnorm_tmpindex_tmpfileschunkqnormedcol_tmpfilesjtmpfiler8   r1   r9   s$                                    @@@r   incremental_quantile_normalizer   _   s|   : ???++ 	J&v..NGUU__566 	J(1&(9(9%GUII__j** 	"J1>v1F1F.GUJ%B   g,,e**  '"" u	N49W|%;<<== :2 :2$GQUl2Aw?? #	
 &&V,, G"388::!3333!hhjjm           %*9g%>%>       Yt!444;;IFFG G G G G G G G G G G G G G G  5((% #"#!" MT%	Aw{*K*K%L%L M   fY'' B  9,,	'0A(B  B
 $5Iubio$ $ j  0  
 GKa888	 9v-y(W5 
 $$HV$DD    &&$$HV$DD   $$HV$DD   d++++[999j111*kk I  NtyUl!:;;    %%$$HT$BB   U##--"2&D .     ^,,, 49W|%;<<== / /wx{>>>WXa[tDDD
 goa&8tLLL -*k6  *k  " )N7=+;l+J!K!K ! ! 	, 	,HAu
 "..000A000 /  G !''000GGU++++U  ....w~~g&& #	'""" U""'95555u$$'9i@@@@y((Wgy*fMMMku	N u	N u	N u	N u	N u	N u	N u	N u	N u	N u	N u	N u	N u	N u	N u	N u	N u	Ns9   A#X=B	GXGXGPXXXc                 @    t          dt          |                      )a  
        Quantile normalize your array.

        It does quantile normalization in the "correct" way in the sense that
        it takes the mean of duplicate values instead of ignoring them.

        Args:
            data: numpy.ndarray or pandas.DataFrame to be normalized
            axis: axis along to normalize. Axis=1 (default) normalizes each
                  column/sample which gives them identical distributions.
                  Axis=0 normalizes each row/feature giving them all identical
                  distributions.
            target: distribution to normalize onto
            ncpus: number of cpus to use for normalization

        Returns: a quantile normalized copy of the input.
        r   r   r   s       r   r   r     r!   r   _datac                     t          j         j        t           j                  st	          d j         d          t           fdt           j        t           j        fD                       rt           j        }nt           j        }|dk    rt          j	                    n|dk    rnt	          d| d          |dk    r- 
                    |          }t          j        |d	          }n*|dk    rt           ||          \  }}nt	          d
          t          j        ||d	          }|t          j        |d	          }nt          |t           j                  s3	 t          j        |          }n# t$          $ r t	          d          w xY w|j        dk    rt	          d|j         d          |j        d         |j        d         k    r,t	          d|j        d          d|j        d          d          t          j        |j        t           j                  st	          d|j         d          t          j        |
                    |                    }t-          ||||          }|dk    r|j        }|S )NzThe type of your data (z[) is is not supported, and might lead to undefined behaviour. Please use numeric data only.c              3   L   K   | ]}t          j        j        |          V  d S r   )rK   
issubdtyperX   )r6   rX   r   s     r   	<genexpr>z(quantile_normalize_np.<locals>.<genexpr>>  sB        .3ek5))     r   r   r   z`qnorm only supports 2 dimensional data, so the axishas to be either 0 or 1, but you set axis to .)rX   r;   z2The number of cpus needs to be a positive integer.z5The target could not be converted to a numpy.ndarray.zWThe target array should be a 1-dimensionsal vector, however you supplied a vector with z dimensionsz=The target array does not contain the same amount of values (z) as the data contains rows ()zThe type of your target ()rK   r   rX   number
ValueErroranyint32r<   float64	transposer&   argsortr   rY   rZ   
isinstancendarrayarray	Exceptionndimrd   sortrb   T)	r   r   r   r   rX   r   rx   
sorted_val	final_ress	   `        r   r$   r$   /  s    =bi00 -ek - - -
 
 	
 
    8:"*7M   
 
  

 qyyU##	  
 
 	
 zz||%|(( Z1---

	,UE5AAjjMNNN#D*1===J~!,,, &"*-- 	&))    N   ;!G.4kG G G   <?djm++%LO% %JqM% % %  
 }V\2955 	1DJ 1 1 1  
 U3344"4ZHHIqyyK	s   &E; ;F)nopythonfastmathcacheqnormrx   r   c                    | j         d         }| j         d         }t          |          D ]}d}||k     rd}d}	||z   |k     rN|||f         |||z   |f         k    r5|	|||z            z  }	|dz  }||z   |k     r|||f         |||z   |f         k    5|dk    r+|	|z  }	t          |          D ]}
|||
z   |f         }|	| ||f<   ||z  }||k     | S )z9
    numba accelerated "actual" qnorm normalization.
    r   r   g        )rd   rM   )r   rx   r   r   n_rowsn_colscol_ir7   nvalr   idxs               r   rb   rb     s,    [^F[^Fv   &jjAC
 Aq%x(Jq1ue|,DDDva!e}$Q	 Aq%x(Jq1ue|,DDD 1uuqq , ,A$QUE\2C(+E#u*%%FA) &jj, Lr   )r   Nr   )r*   r+   r   )"re   rN   	functoolsr   typingr   r   numbanumpyrK   utilr   r   r	   r
   r   r   r   r   pandasrQ   pandas_importModuleNotFoundErrorr_   intr   r   registerr)   strr   r$   jitrb   r   r   r   <module>r      s   				  $ $ $ $ $ $ " " " " " " " "     	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	MM   MMM  P
 '(=A()2 2 2!$2#(rz)9#:2 #&2 !#	2 2 2 X2 '(=A()0 0 0!$0#(rz)9#:0 #&0 !#
	0 0 0 X0  *.	
 
BL"*,-

 dBJ&'
 	

 
r|RZ'	(
 
 
 ^
6   .. *.	 l dBJ&' 	
 
   /.. $nN nNnNnN nN 	nN
 nN 
nN nN nN nN nNf  *.	
 
j

 dBJ&'
 	

 

 
 
 ^
8 RZ(( &*	R R:R
R $
"#R 	R
 ZR R R )(Rj D4t444':'
' 
' J	'
 Z' ' ' 54' ' 's   ; AA