
    DUf                         d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	 d dl
Z G d d          Zd Zd Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zi Zd Zd ZdS )    N)Path)PoolRawArrayc                   "    e Zd Zd Zd ZddZdS )TempFileHolderc                 ,    t                      | _        | S N)listtmpfiles)selfs    G/var/www/html/software/conda/lib/python3.11/site-packages/qnorm/util.py	__enter__zTempFileHolder.__enter__   s        c                     | j         D ]5}t          j                            |          rt          j        |           6d S r	   )r   ospathisfileremove)r   exc_typeexc_valexc_tbfiles        r   __exit__zTempFileHolder.__exit__   sA    M 	  	 Dw~~d##  	$	  	 r    c                    t          j                    }t          d          D ]}d                    t	          j        t          j        t          j        z   d                    }| d| | | }t          j
                            |          rmt          |                                           | j                            |           |c S d S )Nd   r      )k/)tempfile
gettempdirrangejoinrandomchoicesstringascii_uppercasedigitsr   r   existsr   touchr   append)r   prefixsuffixtmpdirirand_seqfilenames          r   get_filenamezTempFileHolder.get_filename   s    $&&s 
	  
	 Awwv5EKKK H !==6=8=V==Hw~~h''  X$$&&&$$X...
	  
	 r   N)r   r   )__name__
__module____qualname__r   r   r2    r   r   r   r      sF                          r   r   c                     ddl }t          |           }d |                    | |ddd          j        D             }d |                    | |dddg          j        D             }|||fS )	zc
    parse a csv file (memory efficient) and get the columns, index and
    delimiter from it.
    r   Nc                 ,    g | ]}t          |          S r6   str.0cols     r   
<listcomp>zparse_csv.<locals>.<listcomp>.   s.        	C  r   
   #)sepnrowscomment	index_colc                 ,    g | ]}t          |          S r6   r9   )r<   rows     r   r>   zparse_csv.<locals>.<listcomp>4   s.        	C  r   )rA   rC   rD   usecols)pandas	get_delimread_csvcolumnsindex)infilepd	delimiterrK   rL   s        r   	parse_csvrP   &   s    
 &!!I ;;	SA  
 

	  G ;;	3!aS  
 

	  E E9$$r   c                 r   ddl }d |                    | dd          j        D             }|                    |           5 }t	          |                                          dk    sJ |                                d         }|                    |d          j        }ddd           n# 1 swxY w Y   ||fS )zA
    parse a hdf file and get the columns and index from it.
    r   Nc                     g | ]}|S r6   r6   r;   s     r   r>   zparse_hdf.<locals>.<listcomp>D   s    KKKssKKKr   )startstop   rL   )rH   read_hdfrK   HDFStorelenkeysselect_columnvalues)rM   rN   rK   hdfkeyrL   s         r   	parse_hdfr^   =   s      LKbkk&kBBJKKKG	V		 7388::!####hhjjm!!#w//67 7 7 7 7 7 7 7 7 7 7 7 7 7 7 E>s   AB**B.1B.c                   	
 ddl }ddlm}  ||           }|j        

j                                        }
fdt          
j                  D             }d |D             }t          |          dk    sJ t          |          dk    r-|d         	|	                    | 	g          j
        j        }d}n*d		t          t          |j        j                            }d
}	fd|D             }||||fS )zE
    parse a parquet file and get the columns and index from it.
    r   N)ParquetFilec                 N    g | ]!}j                             |          j        "S r6   )schemacolumnname)r<   col_imetadatas     r   r>   z!parse_parquet.<locals>.<listcomp>W   s<        	u%%*  r   c                     g | ]}d |v |	S )__index_level_r6   r;   s     r   r>   z!parse_parquet.<locals>.<listcomp>[   s#    DDD#,<,C,C#,C,C,Cr   rU   )rK   Tz__non-existing-col__Fc                      g | ]
}|k    |S r6   r6   )r<   r=   rD   s     r   r>   z!parse_parquet.<locals>.<listcomp>g   s#    :::s	)9)9s)9)9)9r   )rH   pyarrow.parquetr`   rf   rb   to_arrow_schemar"   num_columnsrX   read_parquetrL   r[   r
   num_rows)rM   rN   r`   parquetrb   rK   
index_colsrL   
index_usedrD   rf   s            @@r   parse_parquetrr   L   s9    ++++++k&!!GH_,,..F   8/00  G EDDDDJz??a
:!qM	<<BI

*	U7+45566
::::g:::GE:v--r   c                 .   d |D             }t          | d          5 } |                     |                    dg|z             dz              t          | D ]%}|                     t	          ||                     &	 ddd           dS # 1 swxY w Y   dS )z-
    glue multiple csv into a single csv
    c                 ,    g | ]}t          |          S r6   
read_linesr<   r   s     r   r>   zglue_csv.<locals>.<listcomp>o        CCChZ))CCCr   wr   
N)openwriter#   zip	_glue_csv)outfileheadercolfilesrO   open_colfiles
lotsaliness         r   glue_csvr   k   s     DC(CCCM 
gs		 <winnbTF]33d:;;; }- 	< 	<JMM)J	::;;;;	<< < < < < < < < < < < < < < < < < <s   AB

BBc                    t          j        |           }|                    dgdg|j        d         dz
  z  z             }d                    |g|j        d         z            }|t	          |                                          z  }|dz   S )zv
    private function of qnorm that that can combine multiple chunks of rows and
    columns into a single table.
    z%sz%grU   rz   r   )nphstackr#   shapetupleravel)r   rO   stackfmtdatas        r   r~   r~   {   s    
 Ij!!E
..$4&EKNQ,>"??
@
@C
))SEEKN*
+
+Cu{{}}%%%D$;r   c           	      @   ddl }d |D             }t          | D ]}|                    t          j        |                    }|                    dd           d|j        _        ||_        |	                    d          }|
                    | dddd	d
           dS )-
    glue multiple hdf into a single hdf
    r   Nc                 ,    g | ]}t          |          S r6   ru   rw   s     r   r>   zglue_hdf.<locals>.<listcomp>   rx   r   Tinplacefloat32qnormatable   )r]   r+   modeformatmin_itemsize)rH   r}   	DataFramer   r   	set_indexrL   rd   rK   astypeto_hdf)r   r   r   rN   r   r   dfs          r   glue_hdfr      s     CC(CCCM=) 
 

\\")J//00
Q%%%
YYy!!
		 	 	
 	
 	
 	

 
r   c                    ddl }ddl}|j                            | |          }d |D             }t	          |                    d          j                  dv rt          j        }	nat	          |                    d          j                  dv rt          j	        }	n+t          d|                    d          j         d          t          | D ]}
|r`|                    t          j        |
                    }|                    dd	
           |                    |	          }d|j        _        na|                    t          j        |
          |	          }|                    d	          }|                    |j        d         d          }||_        |                    |j                            |                     dS )r   r   Nc                 ,    g | ]}t          |          S r6   ru   rw   s     r   r>   z glue_parquet.<locals>.<listcomp>   rx   r   rU   floatdoublezThe datatype zc is not (yet) supported. Change the dtype of the parquet file, or make an issue on the github page.Tr   dtype)drop)axis)r   )rH   rj   ro   ParquetWriterr:   fieldtyper   r   float64NotImplementedErrorr}   r   r   r   r   rL   rd   reset_indexr   rK   write_tableTablefrom_pandas)r   r   r   rq   rb   rN   pyarrowwriterr   r   r   r   s               r   glue_parquetr      s    _**7F;;FCC(CCCM
6<<??  W--
	V\\!__!	"	"x	0	0
!FLLOO0   
 
 	
 =) @ @
 	0bi
3344BLLDL)))5!!B BHMMbi
335AABT**BAQ//B
!:!:2!>!>????@ @r   c           	          dd l }t          j                    5  t          j        d           |                    | d dddd          j        j        j        j        }d d d            n# 1 swxY w Y   |S )Nr   ignoreTi  r@   )rA   iteratorrB   rC   rD   )	rH   warningscatch_warningssimplefilterrJ   _enginer   dialectrO   )r   rN   rO   s      r   rI   rI      s    		 	"	" ) )h'''KKtd$q   
 

$wy 	) ) ) ) ) ) ) ) ) ) ) ) ) ) )
 s   AA((A,/A,c              #   D   K   | D ]}t          j        |d          V  dS )a  
    Iterate over lines of a file, multiple lines at the same time. This can be
    useful when iterating over multiple files at the same time on a slow
    filesystem (e.g. hard disks). In this case the file can be read on longer
    batches continuously so the reader does not have to switch as often.

    Args:
        file: path to file
        n: number of lines to read at a time

    Returns:
        a list with a string per line
    T)allow_pickleN)r   load)filesr   s     r   rv   rv      s?        / /gd......./ /r   c           
      \   t          t          j                            |          | j        d         | j        d         z            }t          j        ||                              | j                  }t          j        ||                     |                     t          |t          |||j        f          5 }t          j        |                    t          t          |j        d                             t          j                  j        }ddd           n# 1 swxY w Y   ||fS )zI
    private argsort function of qnorm that works with multiple cpus
    r   rU   r   )	processesinitializerinitargsN)r   r   	ctypeslibas_ctypes_typer   
frombufferreshapecopytor   r   _worker_initarraymap_worker_sortr"   int64T)_arrayncpusr   data_sharedr   pool
sorted_idxs          r   _parallel_argsortr      sG    
##E**FLOfl1o,M K =E222::6<HHDIdFMM%(())) 
 udj1
 
 
  
XHH\5A#7#788
 
 

 	               s   <ADD#&D#c                 B    | t           d<   |t           d<   |t           d<   dS )zB
    helper function to pass our reference of X to the sorter
    XX_dtypeX_shapeN)var_dict)r   r   r   s      r   r   r      s'     HSM!HY!HYr   c                     t          j        t          d         t          d                                       t          d                   }t          j        |dd| f                   S )z
    argsort a single axis
    r   r   r   r   N)r   r   r   r   argsort)r/   X_nps     r   r   r     sW     =#hy.ABBBJJ D :d111a4j!!!r   )r   r   r    r$   r&   pathlibr   multiprocessingr   r   numpyr   r   rP   r^   rr   r   r~   r   r   rI   rv   r   r   r   r   r6   r   r   <module>r      sY   				           * * * * * * * *                   4% % %.  . . .>< < < 	 	 	
 
 
0"@ "@ "@J  / / /$  2 " " "" " " " "r   