
    o[wef                        d dl m Z  d dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZmZ d Zd Zd Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zej                            dddg          d             Zd Zd Zd Z d Z!ej                            ddddgfdd dgfg          d             Z"d Z#d  Z$ej                            d!g d"fg d#fg d$fg d%fg d&d"fg d&d%fg d&d$fg d&d%fg          d'             Z%d( Z&d) Z'd* Z(d+ Z)d, Z*d- Z+dS ).    )datetimeN)
ArrowDtype)	DataFrameIndex
MultiIndexSeries_testingc                     t          dt          j        dg|           }t          j        t
          d          5  |j                            dd            d d d            d S # 1 swxY w Y   d S )NfooBAD__barBADfoodtypezexpand must be True or Falsematch.*(BAD[_]+).*(BAD)expand)r   npnanpytestraises
ValueErrorstrextract)any_string_dtypevaluess     Alib/python3.11/site-packages/pandas/tests/strings/test_extract.py+test_extract_expand_kwarg_wrong_type_raisesr      s    %rvu5=MNNNF	z)G	H	H	H > >
/===> > > > > > > > > > > > > > > > > >s   A$$A(+A(c                 8   t          dt          j        dg|           }t          dt          j        t          j        g|           }|j                            d          }t          j        ||           |j                            dd          }t          j        ||           t          ddgt          j        t          j        gt          j        t          j        gg|           }|j                            d	d
          }t          j        ||           d S )Nr   r   r   BAD__z.*(BAD[_]+).*Tr   BADr   F)r   r   r   r   r   r   tmassert_frame_equal)r   sexpectedresults       r   test_extract_expand_kwargr'      s     "&%08HIIIA'26262:JKKKHU]]?++F&(+++U]]?4]88F&(+++
5	BFBF+bfbf-=>FV  H U]]/]>>F&(+++++    c                  @   t          dt          j        ddt          j                    dd ddg	          } | j                            dd	          }t          j        t          j        g}t          d
dg|d
dg||||||g	          }t          j	        ||           | j                            dd	          }t          d
t          j        d
t          j        t          j        t          j        d t          j        t          j        g	          }t          j
        ||           d S )NaBAD_BAD	BAD_b_BADTr             @r   Fr   BAD_r!   z.*(BAD[_]+).*BAD)r   r   r   r   todayr   r   r   r"   r#   assert_series_equal)serr&   err%   s       r   &test_extract_expand_False_mixed_objectr3   *   s    
	RV[$0@0@%qRUV C
 W__1%_@@F
&"&	B65/2BBPRTVWXXH&(+++ W__/_>>F	rvrvN H 68,,,,,r(   c                      t          g d          } d}t          j        t          |          5  | j                            dd           d d d            d S # 1 swxY w Y   d S )N)A1A2A3A4B5z,only one regex group is supported with Indexr   ([AB])([123])Fr   )r   r   r   r   r   r   )idxmsgs     r    test_extract_expand_index_raisesr=   =   s     ...
/
/C
8C	z	-	-	- 7 76667 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7s   AA Ac                 f    | g d|          }d}t          j        t          |          5  |j                            dd           d d d            n# 1 swxY w Y   t          j        t          |          5  |j                            dd           d d d            d S # 1 swxY w Y   d S )	Nr5   B2C3r   "pattern contains no capture groupsr   
[ABC][123]Fr   
(?:[AB]).*r   r   r   r   r   index_or_seriesr   s_or_idxr<   s       r   ,test_extract_expand_no_capture_groups_raisesrI   G   s@   1119IJJJH
.C 
z	-	-	- 9 9\%8889 9 9 9 9 9 9 9 9 9 9 9 9 9 9 
z	-	-	- 9 9\%8889 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9#   AAA<B&&B*-B*c                      | ddg|          }|j                             dd          } | ddgd|	          }| t          k    rt          j        ||           d S t          j        ||           d S )
Nr5   r6   r   (?P<uno>A)\dFr   Aunonamer   )r   r   r   r"   r0   assert_index_equalrG   r   rH   r&   r%   s        r   (test_extract_expand_single_capture_grouprS   T   s    d|3CDDDH\!!/%!@@FSz=MNNNH&  
vx00000
fh/////r(   c                    t          g d|           }|j                            dd          }t          t          j        t          j        t          j        g|           }t          j        ||           |j                            dd          }t          t          j        t          j        gt          j        t          j        gt          j        t          j        gg|           }t          j        ||           |j                            dd          }t          dd	t          j        g|           }t          j        ||           |j                            d
d          }t          ddgd	dgt          j        t          j        gg|           }t          j        ||           |j                            dd          }t          dd	t          j        gd|           }t          j        ||           |j                            dd          }t          ddgd	dgt          j        t          j        ggddg|           }t          j        ||           |j                            dd          }t          ddgd	dgt          j        t          j        ggddg|           }t          j        ||           |j                            dd          }t          dd	t          j        g|           }t          j        ||           t          g d|           }|j                            dd          }t          ddgd	dgt          j        t          j        gg|           }t          j        ||           t          g d|           }|j                            dd          }t          ddgd	dgt          j        dggddg|           }t          j        ||           t          g d|           }|j                            dd          }t          ddgd	dgdt          j        ggddg|           }t          j        ||           d S )Nr?   r   (_)Fr   (_)(_)([AB])[123]rM   Br:   12(?P<letter>[AB])letterrO   !(?P<letter>[AB])(?P<number>[123])numbercolumnsr   ([AB])(?P<number>[123])r   ([AB])(?:[123])A11B22C33([AB])([123])(?:[123])r5   r@   3"(?P<letter>[AB])?(?P<number>[123])ri   r5   r@   C#(?P<letter>[ABC])(?P<number>[123])?rl   )	r   r   r   r   r   r"   r0   r   r#   r   r$   r&   r%   s       r   "test_extract_expand_capture_groupsro   `   s^   !!!)9:::AU]]5]//Frvrvrv.6FGGGH68,,, U]]8E]22F
&"&	BFBF+bfbf-=>FV  H &(+++ U]]=]77FsC(0@AAAH68,,, U]]?5]99F
sc3Z"&"&!12:J  H &(+++ U]]-e]<<FsC(x?OPPPH68,,, U]]>u]MMF
sc3Z"&"&!128$  H
 &(+++ U]]4U]CCF
sc3Z"&"&!12H  H
 &(+++ U]],U];;FsC(0@AAAH68,,, 	$$$,<===AU]]3E]BBF
sc3Z"&"&!12:J  H &(+++ 	   (8999AU]]?]NNF
sc3Z"&#/8$  H
 &(+++ 	   (8999AU]]@]OOF
sc3Z#rv/8$  H
 &(+++++r(   c                    g d}t          |           dk    rt          j        d           t          |           t          |          k     r5|                     d          } t          |           t          |          k     5| d t          |                   } t	          || |          }|j                            dd          }t	          d	d
t          j        g| |          }t          j
        ||           |j                            dd          }t          dd	gdd
gdt          j        ggddg| |          }t          j        ||           d S )Nrk   r   zTest requires len(index) > 0   indexr   (\d)Fr   rY   rZ   (?P<letter>\D)(?P<number>\d)?rM   rX   rl   r\   r^   r`   rs   r   )lenr   skiprepeatr   r   r   r   r   r"   r0   r   r#   )rs   r   datar1   r&   r%   s         r   (test_extract_expand_capture_groups_indexr{      sN    D
5zzQ2333
e**s4yy
 
 Q e**s4yy
 
  +CII+E
U*:
;
;
;CW__WU_33FsC(=MNNNH68,,,W__=e_LLF
sc3Z#rv/8$	  H &(+++++r(   c                     t          g dd|           }|j                            dd          }t          g dd|           }t          j        ||           d S )	Na3b3c2bobrO   z(?P<sue>[a-z])Fr   abcsue)r   r   r   r"   r0   rn   s       r   ,test_extract_single_series_name_is_preservedr      sh    !!!5EFFFAU]],U];;FoooE9IJJJH68,,,,,r(   c                 (   t          dt          j        dg|           }|j                            dd          }t          ddgt          j        t          j        gt          j        t          j        gg|           }t          j        ||           d S )	Nr   r   r   r   Tr   r    r!   )r   r   r   r   r   r   r"   r#   rn   s       r   test_extract_expand_Truer      s     "&%08HIIIAU]]/]==F
5	BFBF+bfbf-=>FV  H &(+++++r(   c                  6   t           j        t           j        g} t          dt           j        ddt          j                    dd ddg	          }|j                            dd          }t          d	d
g| d	d
g| | | | | | g	          }t          j	        ||           d S )Nr*   r+   Tr   r,   r-   r   r   r.   r!   )
r   r   r   r   r/   r   r   r   r"   r#   )r2   mixedr&   r%   s       r   %test_extract_expand_True_mixed_objectr      s    
&"&	BFN
	
 E Y3DAAF65/2BBPRTVWXXH&(+++++r(   c                 f    | g d|          }d}t          j        t          |          5  |j                            dd           d d d            n# 1 swxY w Y   t          j        t          |          5  |j                            dd           d d d            d S # 1 swxY w Y   d S )	Nr?   r   rB   r   rC   Tr   rD   rE   rF   s       r   4test_extract_expand_True_single_capture_group_raisesr      s@   
 1119IJJJH
.C	z	-	-	- 8 8\$7778 8 8 8 8 8 8 8 8 8 8 8 8 8 8 
z	-	-	- 8 8\$7778 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8rJ   c                      | ddg|          }|j                             dd          }t          dddgi|          }t          j        ||           d S )	Nr5   r6   r   rL   Tr   rN   rM   )r   r   r   r"   r#   rR   s        r   -test_extract_expand_True_single_capture_groupr     sj    d|3CDDDH\!!/$!??F%#s,4DEEEH&(+++++r(   rP   series_namec                 &   t          g d| |          }|j                            dd          }t          t          j        t          j        t          j        g|          }t          j        ||           |j                            dd          }t          t          j        t          j        gt          j        t          j        gt          j        t          j        gg|          }t          j        ||           |j                            dd          }t          d	d
t          j        g|          }t          j        ||           |j                            dd          }t          d	dgd
dgt          j        t          j        gg|          }t          j        ||           |j                            dd          }t          dd	d
t          j        gi|          }t          j        ||           |j                            dd          }t          d	dgd
dgt          j        t          j        ggddg|          }t          j        ||           |j                            dd          }t          d	dgd
dgt          j        t          j        ggddg|          }t          j        ||           |j                            dd          }t          d	d
t          j        g|          }t          j        ||           d S )Nr?   rO   rU   Tr   r   rV   rW   rM   rX   r:   rY   rZ   r[   r\   r]   r^   r_   ra   r   rb   r   r   r   r   r   r   r"   r#   )rP   r   r$   r&   r%   s        r   test_extract_seriesr     s    	!!!4DEEEA U]]5]..F"&"&"&19IJJJH&(+++ U]]8D]11F
&"&	BFBF+bfbf-=>FV  H &(+++ U]]=]66F#sBF+3CDDDH&(+++ U]]?4]88F
sc3Z"&"&!12:J  H &(+++ U]]-d];;F(S#rv$67?OPPPH&(+++ U]]>t]LLF
sc3Z"&"&!128$  H
 &(+++ U]]4T]BBF
sc3Z"&"&!12H  H
 &(+++ U]],T]::F#sBF+3CDDDH&(+++++r(   c                    t          g d|           }|j                            dd          }t          ddgdd	gt          j        t          j        gg|           }t          j        ||           t          g d
|           }|j                            dd          }t          ddgdd	gt          j        dggddg|           }t          j        ||           t          g d|           }|j                            dd          }t          ddgdd	gdt          j        ggddg|           }t          j        ||           d S )Nrc   r   rg   Tr   rM   rY   rX   rZ   rh   rj   ri   r\   r^   r_   rk   rm   rl   r   rn   s       r   test_extract_optional_groupsr   F  s   $$$,<===AU]]3D]AAF
sc3Z"&"&!12:J  H &(+++ 	   (8999AU]]?]MMF
sc3Z"&#/8$  H
 &(+++ 	   (8999AU]]@]NNF
sc3Z#rv/8$  H
 &(+++++r(   c                    g d}t          |           t          |          k     rt          j        d           | d t          |                   } t          || |          }|j                            dd          }t          ddt          j        g| |          }t          j
        ||           |j                            d	d          }t          d
dgddgdt          j        ggddg| |          }t          j
        ||           d S )Nrk   zIndex too shortrr   rt   Tr   rY   rZ   ru   rM   rX   rl   r\   r^   rv   )rw   r   rx   r   r   r   r   r   r   r"   r#   )rs   r   rz   r$   r&   r%   s         r   +test_extract_dataframe_capture_groups_indexr   d  s    D
5zzCII%&&&+CII+Et5(8999AU]]74]00F#sBF+5@PQQQH&(+++U]];D]IIF
sc3Z#rv/8$	  H &(+++++r(   c                     t          g dd|           }|j                            dd          }t          dg di| 	          }t	          j        ||           d S )
Nr}   r   rO   (?P<letter>[a-z])Tr   r\   r   r   )r   r   r   r   r"   r#   rn   s       r   'test_extract_single_group_returns_framer   ~  sm     	!!!=MNNNAU]]/]==F(OOO4<LMMMH&(+++++r(   c                 0   dddddt           j        dg}g d}d}g d	}t          || 
          }t          j        g dd          }t          |||| 
          }|j                            |t          j	                  }t          j        ||           t          j        g d          }	t          ||	|           }t          j        g dd          }t          |||| 
          }|j                            |t          j	                  }t          j        ||           t          ||	|           }d|j        _        d|_        t          |||| 
          }|j                            |t          j	                  }t          j        ||           d S )Nzdave@google.comztdhock5@gmail.comzmaudelaperriere@gmail.comz'rob@gmail.com some text steve@gmail.comz%a@b.com some text c@d.com and e@f.com ))davegooglecom)tdhock5gmailr   )maudelaperrierer   r   )robr   r   )stever   r   )r   r   r   )r   dr   )efr   zY
    (?P<user>[a-z0-9]+)
    @
    (?P<domain>[a-z]+)
    \.
    (?P<tld>[a-z]{2,4})
    )userdomaintldr   )r   r   r,   r   rq   r   )   r   )r   r,   )   r   )r   r,   )r   rq   Nr   names)flags))singleDave)r   Toby)r   Maude)multiplerobAndSteve)r   abcdef)nonemissing)r   emptyrr   ))r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r,   )r   r   r   )r   r   r,   )r   r   rq   )NNr   )matchesdescription)r   r   r   )r   r   r   r   from_tuplesr   r   
extractallreVERBOSEr"   r#   rs   r   )
r   rz   expected_tuplespatexpected_columnsr$   expected_indexr%   r&   mis
             r   test_extractallr     s   #1/

D	 	 	OC 100t+,,,A  +HHH  N )9AQ  H Uc44F&(+++ 
		
 	
 	


 

B 	t2%5666A+		
 		
 		
 $  N )9AQ  H Uc44F&(+++ 	t2%5666A.AGM>N)9AQ  H Uc44F&(+++++r(   zpat,expected_namesrj   r\   r^   z([AB])?(?P<number>[123])c                    t          g d|          }|j                            |           }t          dt          j        dft          j        dfgt          j        g dd          ||	          }t          j	        ||           d S )
N)r   r5   32r   )rM   rY   ri   rZ   )r   r   rq   r,   r   r   )rs   r`   r   )
r   r   r   r   r   r   r   r   r"   r#   )r   expected_namesr   r$   r&   r%   s         r   test_extractall_column_namesr     s     	'7888AUc""F	bfc]RVSM2$%=%=%=_UUU	  H &(+++++r(   c                 n   t          g dd|           }t          j        g dd          }|j                            d          }t          dg d	i|| 
          }t          j        ||           |j                            d          }t          g d	|| 
          }t          j        ||           d S )Nr~   r   d4c2r   rO   r   r   r   r   r   r   r   r\   )r   r   r   r   rr   ([a-z]))r   r   r   r   r   r   r"   r#   )r   r$   r   r&   r%   s        r   test_extractall_single_groupr     s    ###-?OPPPA+(((  N
 U233F	'''(FV  H &(+++ Uj))FN:J  H &(+++++r(   c                     t          g dd|           }|j                            d          }t          g dt	          j        g dd          | 	          }t          j        ||           d S )
N)ab3abc3d4cd2r   rO   z([a-z]+))ababcr   cdr   r   r   rr   )r   r   r   r   r   r   r"   r#   rn   s       r   ,test_extractall_single_group_with_quantifierr     s     	'''mCSTTTAUk**F   $,,,O
 
 
   H &(+++++r(   zdata, names)N)i1)Ni2)r   r   r   c                   	 t          |           	t          |          dk    r%t          t          	          |d                   }n1	fdt          	          D             }t          j        ||          }t          | d||          }t          j        g |dz             }|j                            d	          }t          dg||
          }t          j
        ||           |j                            d          }t          ddg||
          }t          j
        ||           |j                            d          }t          dg||
          }t          j
        ||           |j                            d          }t          ddg||
          }t          j
        ||           |j                            d          }t          ddg||
          }t          j
        ||           d S )Nr,   r   rP   c              3   D   K   | ]}t          |gd z
  z            V  dS )r,   N)tuple).0ins     r   	<genexpr>z-test_extractall_no_matches.<locals>.<genexpr>+  s5      991%q1u&&999999r(   r   r   rP   rs   r   r   z(z)rv   z(z)(z)z(?P<first>z)firstz(?P<first>z)(?P<second>z)secondz(z)(?P<second>z))rw   r   ranger   r   r   r   r   r   r"   r#   )
rz   r   r   rs   tuplesr$   r   r&   r%   r   s
            @r   test_extractall_no_matchesr     s    	D		A
5zzQeAhhU1X...9999a999&vU;;;t-u<LMMMA+Buz7IKKKN Ue$$F!NBRSSSH&(+++ Uh''F!Q~EUVVVH&(+++ Un--F	7G  H &(+++ U9::F(#>AQ  H &(+++ U011FH^;K  H &(+++++r(   c                    t          g dd|           }|j                            d          }t          dg dit	          j        g dd dg	          | 
          }t          j        ||           | dk    rVt          g d          t          g dd          fD ]1}|j                            d          }t          j        ||           2t          g ddt          g dd          |           }|j                            d          }t          dg dit	          j        g dddg	          | 
          }t          j        ||           d S )N)a1a2b1c1xxxrO   z[ab](?P<digit>\d)digit)rY   rZ   rY   )r   )r   r,   r   r   r   rr   objectr   s_name)XXyyzzidx_namer   ))r   r   )r   r,   )r   r   )	r   r   r   r   r   r   r"   r#   r   )r   r$   r&   r%   r;   s        r   test_extractall_stringindexr   P  s   ###%7GHHHAU233F	///"$%=%=%=dG_UUU  H
 &(+++ 8##&&&''&&&U333
 	4 	4C W''(<==F!&(3333&&&Z888		 	 	A U233F	///"$---j'5J
 
 
   H &(+++++r(   c                     t          g dd|           }t          j        t          d          5  |j                            d           d d d            d S # 1 swxY w Y   d S )Nr   r   rO   zno capture groupsr   z[a-z])r   r   r   r   r   r   )r   r$   s     r   (test_extractall_no_capture_groups_raisesr   u  s     	###-?OPPPA	z)<	=	=	= # #	"""# # # # # # # # # # # # # # # # # #s   AAAc                  Z   t          g dg dd          } | j        j                            dd          }t	          g d          }t          j        ||           | j        j                            d	d          }g d
}t	          |ddg          }t          j        ||           d S )Nr   )r7   B3D4r   )rs   rP   z([A-Z])Tr   )rM   rX   Dz!(?P<letter>[A-Z])(?P<digit>[0-9])))rM   ri   )rX   ri   )r   4r\   r   )r`   )r   rs   r   r   r   r"   r#   )r$   rr   e_lists       r   !test_extract_index_one_two_groupsr  }  s    ###+=+=+=MRRRA	Jt44A///""A!Q
 	
@NNA111F&8W"5666A!Qr(   c                 N   t          g dd|           }d}|j                            |d          }|j                            |          }|                    dd	          }t          j        ||           d
}|j                            |d          }|j                            |          }|                    dd	          }t          j        ||           d}|j                            |d          }	|j                            |          }|                    dd	          }t          j        |	|           d}
|j                            |
d          }|j                            |
          }|                    dd	          }t          j        ||           d S )Nr}   r   rO   ([a-z])([0-9])Tr   r   r   level!(?P<letter>[a-z])(?P<digit>[0-9])(?P<group_name>[a-z])r   )r   r   r   r   xsr"   r#   )r   r$   pattern_two_nonameextract_two_nonamehas_multi_indexno_multi_indexpattern_two_namedextract_two_namedpattern_one_namedextract_one_namedpattern_one_nonameextract_one_nonames               r   test_extractall_same_as_extractr    s   !!!=MNNNA*'9$GGe&&'9::O$'''99N,n===<&7EEe&&'899O$'''99N+^<<<0&7EEe&&'899O$'''99N+^<<<#'9$GGe&&'9::O$'''99N,n=====r(   c                    t          j        g dd          }t          g d|d|           }d}|j                            |d	          }|j                            |          }|                    d
d          }t          j        ||           d}|j                            |d	          }|j                            |          }|                    d
d          }t          j        ||           d}	|j                            |	d	          }
|j                            |	          }|                    d
d          }t          j        |
|           d}|j                            |d	          }|j                            |          }|                    d
d          }t          j        ||           d S )N))rM   r   )rX   r   )rl   third)capitalordinalr   r}   r   )rs   rP   r   r  Tr   r   r   r  r	  r
  r   )	r   r   r   r   r   r   r  r"   r#   )r   r   r$   r  r  has_match_indexno_match_indexr  r  r  r  r  r  s                r   -test_extractall_same_as_extract_subject_indexr    s   		999$
 
 
B 	!!!-GWXXXA*'9$GGe&&'9::O$'''99N,n===<&7EEe&&'899O$'''99N+^<<<0&7EEe&&'899O$'''99N+^<<<#'9$GGe&&'9::O$'''99N,n=====r(   c                      t          j        d          } t          ddgt          |                                                     j                            d          }|j        d         dk    sJ d S )Npyarrowr   r   r   z(ab)r   zstring[pyarrow])r   importorskipr   r   stringr   r   dtypes)par&   s     r   test_extractall_preserves_dtyper$    sk     
	Y	'	'BUDMBIIKK)@)@AAAEPPQWXXF=0000000r(   ),r   r   numpyr   r   pandas.core.dtypes.dtypesr   pandasr   r   r   r   r	   r"   r   r'   r3   r=   rI   rS   ro   r{   r   r   r   r   r   markparametrizer   r   r   r   r   r   r   r   r   r   r   r  r  r  r$   r(   r   <module>r+     s.         				      0 0 0 0 0 0             > > >, , ,"- - -&7 7 7
9 
9 
9	0 	0 	0P, P, P,f, , ,6- - -, , ,, , ,*8 8 8, , , $!6776, 6, 876,r, , ,<, , ,4, , ,R, R, R,j  
.(/CD	#a]3	 
, 
, 
,, , ,*, , ,  	W	W	\	\			w'			|,			|,			|,	 (, (, (,V", ", ",J# # #     > > >8> > >B1 1 1 1 1r(   