
    hM                       d dl mZ d dlZd dlZd dlmZmZmZ d dlZd dl	Z
d dlmZmZ d dlmZmZmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z'm(Z( d dl)m*Z* es
d dl+Z,d dl-m.Z/ erd dl0m1Z1 d dl2m3Z3m4Z4m5Z5m6Z6m7Z7 d dl8m9Z9 ee:ejv                  f   Z<ddZ=d Z> G d de*ee'      Z? G d de?      Z@y)    )annotationsN)TYPE_CHECKINGCallableUnion)libmissing)pa_version_under10p1pa_version_under13p0pa_version_under16p0)find_stack_level)	is_scalarpandas_dtype)isna)ArrowStringArrayMixin)ArrowExtensionArray)BooleanDtype)Float64Dtype)
Int64Dtype)NumericDtype)BaseStringArrayStringDtype)ObjectStringArrayMixin)Sequence)	ArrayLikeDtypeScalarSelfnpt)Seriesc                 *    t         rd} t        |       y )NzCpyarrow>=10.0.1 is required for PyArrow backed ArrowExtensionArray.)r	   ImportError)msgs    DD:\jyotish\venv\Lib\site-packages\pandas/core/arrays/string_arrow.py_chk_pyarrow_availabler$   A   s    S#     c                R    t          xr t        j                  j                  |       S N)r   patypesis_string_view)typs    r#   _is_string_viewr,   G   s    ##D(?(?(DDr%   c                  "    e Zd ZU dZded<   dZej                  Zded<   d+ fdZ	e
d,d- fd	       Ze
	 d.	 	 	 	 	 d/ fd       Zd0dZe
dd
dd1d       Ze
	 d.	 	 	 d1d       Zed2d       Zd3 fdZej(                  dfdZ fdZd4dZd5d6 fdZed        Zej6                  Zej8                  Zej:                  Zej<                  Zej>                  Zej@                  Z ejB                  Z!ejD                  Z"ejF                  Z#e$jJ                  Z%ejL                  Z&ejN                  Z'ejP                  Z(ejR                  Z)ejT                  Z*ejV                  Z+ejX                  Z,ejZ                  Z-ej\                  Z.ej^                  Z/ej`                  Z0ejb                  Z1ejd                  Z2ejf                  Z3ejh                  Z4ejj                  Z5e6d7d       Z7e6d8d       Z8ddej(                  df	 	 	 	 	 d9 fdZ9ddej(                  f	 	 	 	 	 	 	 d: fdZ:ddej(                  f	 	 	 	 	 	 	 d: fdZ;	 	 	 	 d;	 	 	 	 	 	 	 	 	 	 	 d< fdZ<d= fdZ=d> fd Z>d?d@ fd!Z?dAdB fd"Z@dCdDd#ZAd$ ZBd% ZCdd
d&	 	 	 	 	 dEd'ZDd5dF fd(ZE fd)ZFdGd*ZG xZHS )HArrowStringArraya  
    Extension array for string data in a ``pyarrow.ChunkedArray``.

    .. warning::

       ArrowStringArray is considered experimental. The implementation and
       parts of the API may change without warning.

    Parameters
    ----------
    values : pyarrow.Array or pyarrow.ChunkedArray
        The array of data.

    Attributes
    ----------
    None

    Methods
    -------
    None

    See Also
    --------
    :func:`pandas.array`
        The recommended function for creating a ArrowStringArray.
    Series.str
        The string methods are available on Series backed by
        a ArrowStringArray.

    Notes
    -----
    ArrowStringArray returns a BooleanArray for comparison methods.

    Examples
    --------
    >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string[pyarrow]")
    <ArrowStringArray>
    ['This is', 'some text', <NA>, 'data.']
    Length: 4, dtype: string
    r   _dtypepyarrowzlibmissing.NAType | float	_na_valuec                ~   t                t        |t        j                  t        j                  f      rt        j
                  j                  |j                        st        |j                        st        j
                  j                  |j                        rt        j
                  j                  |j                  j                        sRt        j
                  j                  |j                  j                        st        |j                  j                        r(t        j                  |t        j                               }t        | A  |       t#        | j$                  | j&                        | _        t        j
                  j                  | j*                  j                        st-        d      y )N)storagena_valuezHArrowStringArray requires a PyArrow (chunked) array of large_string type)r$   
isinstancer(   ArrayChunkedArrayr)   	is_stringtyper,   is_dictionary
value_typeis_large_stringpccastlarge_stringsuper__init__r   _storager1   r/   	_pa_array
ValueError)selfvalues	__class__s     r#   rA   zArrowStringArray.__init__   s    frxx9:HHv{{+v{{+&&v{{3HH&&v{{'='=>xx//0F0FG&v{{'='=> WWVR__%67F !$--$..Qxx''(;(;<$  =r%   Nc                    t         |   ||      }t        j                  j	                  |j
                        r*|(t        j                  |t        j                               }|S r'   )	r@   _box_pa_scalarr(   r)   r8   r9   r=   r>   r?   )clsvaluepa_type	pa_scalarrG   s       r#   rI   zArrowStringArray._box_pa_scalar   sK    G*5':	88inn-'/	2??+<=Ir%   Fc                    t         |   ||      }t        j                  j	                  |j
                        r*|(t        j                  |t        j                               }|S r'   )	r@   _box_pa_arrayr(   r)   r8   r9   r=   r>   r?   )rJ   rK   rL   copypa_arrayrG   s        r#   rO   zArrowStringArray._box_pa_array   sL     7(888hmm,wwx):;Hr%   c                ,    t        | j                        S )z]
        Length of this array.

        Returns
        -------
        length : int
        )lenrC   rE   s    r#   __len__zArrowStringArray.__len__   s     4>>""r%   dtyperP   c                  ddl m} t                |rAt        |t              r|dk(  s,t        |      }t        |t              r|j                  dk(  sJ t        ||      r`|j                  }|j                  }t        j                  ||d      } | t        j                  ||t        j                                     S t        |t        j                  t        j                   f      r. | t#        j$                  |t        j                                     S t        j                  ||      } | t        j                  |t        j                         d	
            S )Nr   )BaseMaskedArraystringr0   F)rP   convert_na_value)maskr9   rP   T)r9   from_pandas)pandas.core.arrays.maskedrY   r$   r5   strr   r   r3   _mask_datar   ensure_string_arrayr(   arrayr?   r6   r7   r=   r>   )rJ   scalarsrW   rP   rY   	na_valuesresults          r#   _from_sequencezArrowStringArray._from_sequence   s    = *UC0Uh5F 'Ee[1emmy6PPPg/  I]]F,,V$QVWFrxxYR__=NOPP"((BOO!<=rwww(9:;; ((t<288F):MNNr%   c                *    | j                  |||      S )NrV   )rh   )rJ   stringsrW   rP   s       r#   _from_sequence_of_stringsz*ArrowStringArray._from_sequence_of_strings   s     !!'T!BBr%   c                    | j                   S )z3
        An instance of 'string[pyarrow]'.
        )r/   rT   s    r#   rW   zArrowStringArray.dtype   s    
 {{r%   c                B   | j                   j                  t        j                  u r"|t        j                  u rt        j
                  }t        |t              s7|t        j
                  ur%t        d| dt        |      j                   d      t        | 1  ||      S )NInvalid value 'C' for dtype 'str'. Value should be a string or missing value, got '
' instead.)rW   r4   npnan
libmissingNAr5   r`   	TypeErrorr9   __name__r@   insert)rE   locitemrG   s      r#   rw   zArrowStringArray.insert   s    ::"&&(TRVV^==D$$Z]])B!$ (115d1D1D0EZQ  w~c4((r%   c                Z   |t         j                  urNt        |      sCt        |t              s3t        j                  d| dt        t                      t	        |      }| j                  j                  t        j                  u r_|t         j                  u st        |      r!|j                  d      }|j                         S |j                  |      }|j                         S |t         j                  urt        |      s|j                  |      }t               j!                  |      S )Nz$Allowing a non-bool 'na' in obj.str.z2 is deprecated and will raise in a future version.
stacklevelF)r   
no_defaultr   r5   boolwarningswarnFutureWarningr   rW   r4   rq   rr   	fill_nullto_numpyr   __from_arrow__)rE   rF   namethod_names       r#   _convert_bool_resultz%ArrowStringArray._convert_bool_result   s    S^^#DHZD=QMM6{m D6 6+-	 bB::"&&(S^^#tBx))%0 ??$$  ))"-??$$'1  ))"-~,,V44r%   c                \   t        |      rCt        |      rd}nt        |t              stt	        d| dt        |      j                   d      t        j                  |t        d      }d|t        |      <   |D ]   }|t        |t              rt	        d       t        | -  |      S )z-Maybe convert value to be pyarrow compatible.Nrn   ro   rp   TrV   z]Invalid value for dtype 'str'. Value should be a string or missing value (or array of those).)r   r   r5   r`   ru   r9   rv   rq   rd   objectr@   _maybe_convert_setitem_value)rE   rK   vrG   s      r#   r   z-ArrowStringArray._maybe_convert_setitem_value   s    UE{s+%eW -559%[5I5I4J*V 
 HHU&t<E!%E$u+	Z3%7#G   w3E::r%   c                `   |D cg c]  }t        j                  |d       c}D cg c]Y  }|j                  t        j                         t        j                         t        j
                         fv r|j                         [ }}t        |      s$t        j                  t        |       t              S t        j                  | j                  t        j                  || j                  j                              }t        j                  |t        j                        S c c}w c c}w )NT)r^   )rW   )r9   )	value_set)r(   scalarr9   rZ   nullr?   as_pyrS   rq   zerosr~   r=   is_inrC   rd   bool_)rE   rF   rK   rM   r   rg   s         r#   isinzArrowStringArray.isin  s     OUUfUbii4@fU
U	~~"))+rwwy"//:K!LL OOU 	 
 9~88CIT22NNbhhyt~~?R?R&S

 xxbhh// V
s   D&AD+Tc                   t        |      }|| j                  k(  r|r| j                         S | S t        |t              rI| j
                  j                  t        j                  |j                              }|j                  |      S t        |t        j                        rEt        j                  |t        j                        r!| j                  |t        j                        S t         | E  ||      S )N)rW   r4   r]   )r   rW   rP   r5   r   rC   r>   r(   from_numpy_dtypenumpy_dtyper   rq   
issubdtypefloatingr   rr   r@   astype)rE   rW   rP   datarG   s       r#   r   zArrowStringArray.astype"  s    U#DJJyy{"K|,>>&&r':':5;L;L'MND''--rxx(R]]5"++-N==urvv=>>w~e$~//r%   c                    t        j                  t        |       j                   dt        t                      | j                  S )NzV._data is a deprecated and will be removed in a future version, use ._pa_array insteadr{   )r   r   r9   rv   r   r   rC   rT   s    r#   rb   zArrowStringArray._data1  s?     	Dz""# $: :')		
 ~~r%   c                    t        | t        j                        xr2 | j                  t        j                  t        j
                  z   z  dk7  S Nr   )r5   rePatternflags
IGNORECASEUNICODE)pats    r#   _is_re_pattern_with_flagsz*ArrowStringArray._is_re_pattern_with_flags[  s>    
 sBJJ' Ar}}rzz9::q@	
r%   c                    | j                   }| j                  }|t        j                  z  rd}|t        j                   z  }|t        j                   z  }|||fS NF)patternr   r   r   r   )r   caser   r   s       r#   _preprocess_re_patternz'ArrowStringArray._preprocess_re_patternd  sT    ++		2== DR]]N*E #e##r%   r   c                    |s| j                  |      rt        | 	  |||||      S t        |t        j
                        r| j                  ||      \  }}}t        j                  | |||||      S r'   )r   r@   _str_containsr5   r   r   r   r   )rE   r   r   r   r   regexrG   s         r#   r   zArrowStringArray._str_containsq  so     D22377(dE2uEEc2::&#::3ECu$224dE2uUUr%   c                    |s| j                  |      rt        | 	  ||||      S t        |t        j
                        r| j                  ||      \  }}}t        j                  | ||||      S r'   )r   r@   
_str_matchr5   r   r   r   r   rE   r   r   r   r   rG   s        r#   r   zArrowStringArray._str_match  sk     D22377%c4;;c2::&#::3ECu$//c4KKr%   c                    |s| j                  |      rt        | 	  ||||      S t        |t        j
                        r| j                  ||      \  }}}t        j                  | ||||      S r'   )r   r@   _str_fullmatchr5   r   r   r   r   r   s        r#   r   zArrowStringArray._str_fullmatch  sk     D22377)#tUB??c2::&#::3ECu$33D#tUBOOr%   c           	        t        |t        j                        s9t        |      s.|r,|s*t        |t              r.d|v st        j
                  d|      t        |   ||||||      S t        j                  | ||||||      S )Nz\g<z\\\d)	r5   r   r   callabler`   searchr@   _str_replacer   )rE   r   replnr   r   r   rG   s          r#   r   zArrowStringArray._str_replace  s     sBJJ'~ 4%t^ryy$'?'K 7'T1dE5II$11#tQeU
 	
r%   c                p    t        |t              st        |   |      S t	        j                  | |      S )N)repeats)r5   intr@   _str_repeatr   )rE   r   rG   s     r#   r   zArrowStringArray._str_repeat  s0    '3'7&w//&224IIr%   c                Z    t         st        j                  | |      S t        | 	  |      S r'   )r
   r   _str_removeprefixr@   )rE   prefixrG   s     r#   r   z"ArrowStringArray._str_removeprefix  s)    #(::4HHw(00r%   c                    |rt         |   ||      S t        j                  | j                  |      }| j                  |      S r'   )r@   
_str_countr=   count_substring_regexrC   _convert_int_result)rE   r   r   rg   rG   s       r#   r   zArrowStringArray._str_count  s?    7%c511))$..#>''//r%   c                ~    t         r|dk7  r||dk(  r|t        | 	  |||      S t        j                  | |||      S r   )r
   r@   	_str_findr   )rE   substartendrG   s       r#   r   zArrowStringArray._str_find  sI     aZCOaZCK 7$S%55$..tS%EEr%   c                F   t        | j                        j                  |      \  }}t        |      dk(  r't	        j
                  dt        j                        |fS t	        j                  |j                               }|j                  t        j                  d      |fS )Nr   )r   r   )shaperW   Fr]   )
r   rC   _str_get_dummiesrS   rq   emptyint64vstackr   r   )rE   sep
dummies_palabelsdummiess        r#   r   z!ArrowStringArray._str_get_dummies  sz    0@QQRUV
Fv;!88&96AA))J//12~~bhhU~3V;;r%   c                v   | j                   j                  t        j                  u r{t	        |t
        j                        r|j                  d      }n|j                         }|j                   t        j                  k(  r|j                  t        j                        }|S t               j                  |      S )NFzero_copy_only)rW   r4   rq   rr   r5   r(   r6   r   int32r   r   r   r   rE   rg   s     r#   r   z$ArrowStringArray._convert_int_result  s|    ::"&&(&"((+>*||rxx'rxx0M|**622r%   c                    | j                   j                  t        j                  u rPt	        |t
        j                        r|j                  d      }n|j                         }|j                  dd      S t               j                  |      S )NFr   float64r]   )rW   r4   rq   rr   r5   r(   r6   r   r   r   r   r   s     r#   _convert_rank_resultz%ArrowStringArray._convert_rank_result  sh    ::"&&(&"((+>*===77~,,V44r%   skipnakeepdimsc                  | j                   j                  t        j                  u r|dv r|sTt	        j
                  | j                        }t	        j                  |t	        j                  | j                  d            }n t	        j                  | j                  d      } t        |      j                  |f||d|}|r|j                  t        j                        S |S |dv r | j                  |f||d|}nt        d| d      |dv r+t        |t         j"                        r| j%                  |      S t        |t         j"                        r t'        |       |      S |S )N)anyall r   )minmaxsumargminargmaxzCannot perform reduction 'z' with string dtype)r   r   )rW   r4   rq   rr   r=   is_nullrC   	or_kleene	not_equalr   _reducer   r   _reduce_calcru   r5   r(   r6   r   r9   )rE   namer   r   kwargsnasarrrg   s           r#   r   zArrowStringArray._reduce  s<    ::"&&(T^-Cjj0ll3T^^R(HIll4>>265(-55#h:@F }}RXX..M<<&T&&tXFXXQWXF8>QRSS''Jvrxx,H++F33)4:f%%Mr%   c                    t         |   |      }| j                  j                  t        j
                  u rC|j                  j                         }|j                  ||j                  |j                  d      S |S )N)dropnaF)indexr   rP   )r@   value_countsrW   r4   rq   rr   _valuesr   _constructorr   r   )rE   r   rg   
res_valuesrG   s       r#   r   zArrowStringArray.value_counts  sm    %V%4::"&&(002J&&&,,V[[u '   r%   c                   t        |t        t        f      rR| j                  j                  t
        j                  ur,|j                  j                  t
        j                  u rt        S t        | %  ||      }| j                  j                  t        j                  u rU|t        j                  k(  r!|j                  t        j                  d      S |j                  t        j                  d      S |S )NT)r4   F)r5   r   r   rW   r4   rs   rt   NotImplementedr@   _cmp_methodrq   rr   operatorner   r   )rE   otheroprg   rG   s       r#   r   zArrowStringArray._cmp_method  s    u0CDE

##:==8$$
5 "!$UB/::"&&(X[[ rxx$??rxx%@@r%   c                4    t        d| j                   d      )Nzbad operand type for unary +: '')ru   rW   rT   s    r#   __pos__zArrowStringArray.__pos__&  s    9$**QGHHr%   returnNoner'   )rL   pa.DataType | Noner  z	pa.Scalarr   )rL   r  rP   r~   r  zpa.Array | pa.ChunkedArray)r  r   )rW   zDtype | NonerP   r~   )r  r   )rx   r   r  r.   )rF   r   r  znpt.NDArray[np.bool_])T)rP   r~   )r   str | re.Patternr  r~   )r   z
re.Patternr   r~   r  ztuple[str, bool, int])r   r~   r   r   r   r~   )r   r  r   r~   r   r   r   zScalar | lib.NoDefault)Tr   T)r   r  r   zstr | Callabler   r   r   r~   r   r   r   r~   )r   zint | Sequence[int])r   r`   )r   )r   r`   r   r   )r   N)r   r`   r   r   r   z
int | None)|)r   r`   )r   r`   r   r~   r   r~   )r   r~   r  r   )r  r   )Irv   
__module____qualname____doc____annotations__rB   rs   rt   r1   rA   classmethodrI   rO   rU   rh   rk   propertyrW   rw   r   r}   r   r   r   r   rb   r   _str_isalnum_str_isalpha_str_isdecimal_str_isdigit_str_islower_str_isnumeric_str_isspace_str_istitle_str_isupperr   _str_map_str_startswith_str_endswith_str_pad
_str_lower
_str_upper
_str_strip_str_lstrip_str_rstrip_str_removesuffix_str_get_str_capitalize
_str_title_str_swapcase_str_slice_replace_str_len
_str_slicestaticmethodr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__)rG   s   @r#   r.   r.   P   s   'V H+5==I(82   EJ/>B	# # >BQV O O. ?DC)C8<C C
  ) /2nn$ 52;*0$0   )55L(55L*99N(55L(55L*99N(55L(55L(55L''H+;;O)77M$--H&11J&11J&11J'33K'33K-??$--H+;;O&11J)77M.AA$--H&11J
 
 
$ 
$ >>V V 	V V& %(^^LL L 	L
 #L" %(^^PP P 	P
 #P$ 

 
 	

 
 
 
4J1
0F<
35 ,0%$(;?:"Ir%   r.   c                  $    e Zd Zej                  Zy)ArrowStringArrayNumpySemanticsN)rv   r  r	  rq   rr   r1    r%   r#   r+  r+  *  s    Ir%   r+  r  )A
__future__r   r   r   typingr   r   r   r   numpyrq   pandas._libsr   r   rs   pandas.compatr	   r
   r   pandas.util._exceptionsr   pandas.core.dtypes.commonr   r   pandas.core.dtypes.missingr   'pandas.core.arrays._arrow_string_mixinsr   pandas.core.arrays.arrowr   pandas.core.arrays.booleanr   pandas.core.arrays.floatingr   pandas.core.arrays.integerr   pandas.core.arrays.numericr   pandas.core.arrays.string_r   r    pandas.core.strings.object_arrayr   r0   r(   pyarrow.computecomputer=   collections.abcr   pandas._typingr   r   r   r   r   pandasr   r`   NATypeArrowStringScalarOrNATr$   r,   r.   r+  r,  r%   r#   <module>rD     s    "  	 
   
 5 , I 8 3 4 1 3 D  (   sJ$5$556 EWI-/BO WIt%5 r%   