
    6h
Z              	      B   % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKJrJrJ	r	  SSK
JrJrJr  SSKJr  SSKJr  \	" SS	S
9r\R*                  " S5      rS\S'   \R.                  r\R0                  r\R2                  r\R4                  r\R8                  r\rSRA                  5        V s/ s H!  n \" \!" U S5      5      RE                  S5      PM#     sn r#\$" S \# 5       5      (       d   eSAS jr%\RL                  r'\'(       a3  \RP                  r)\RT                  S:X  a  SBS jr+\+\)RX                  l-        ON\RR                  R]                  5        VVs0 s H&  u  pURE                  S5      URE                  S5      _M(     snnr)SSS.r/\R`                  (       a  S\/S'   Sr1S\S'    \)R[                  SS5      r1\1(       d;  \Rd                  " 5       RE                  S5      =(       d    Sr1\/R[                  \1\15      r1\)R[                  SS5      r4S\S'   S r5 " S! S	\65      r7 " S" S#\65      r8SAS$ jr9SAS% jr:SCS& jr;SDS' jr<SES( jr=\;r>\<r?\=r@SAS) jrASAS* jrBSFS+ jrC\'(       dv  \R`                  (       a   " S, S-\D5      rE\E" 5       r)\RR                  R]                  5        H5  u  rFrG\9" \GRE                  S5      5      \)\9" \FRE                  S5      5      '   M7     \R                  " S.5      rI\R`                  (       a  S/ rJO\R                  rJ\" \)R[                  S0S15      S2:H  =(       a    S3=(       d    S45      rLSGS5 jrMSHS6 jrNSIS7 jrO  SJ         SKS8 jjrP " S9 S:5      rQSLSMS; jjrRS<rS/ S=QrTSNS> jrUSAS? jrVSAS@ jrWgs  sn f s  snnf ! \Rf                   a    Sr1 GNf = f)O    )annotationsN)AnyCallableTypeVar   )errorpolicypycompat)modules)
charencode
_Tlocalstrlocalstr)boundr   zintmod.CharEncodingsO   200c 200d 200e 200f 202a 202b 202c 202d 202e 206a 206b 206c 206d 206e 206f feff   utf-8c              #  B   #    U  H  oR                  S 5      v   M     g7f))      N)
startswith).0is     4/usr/lib/python3/dist-packages/mercurial/encoding.py	<genexpr>r   6   s     
=W<<*++Ws   bytesc                \    SU ;   d  SU ;   a  [          H  nU R                  US5      n M     U $ )u   Remove codepoints ignored by HFS+ from s.

>>> hfsignoreclean(u'.h‌g'.encode('utf-8'))
'.hg'
>>> hfsignoreclean(u'.h﻿g'.encode('utf-8'))
'.hg'
r   r       )_ignorereplace)scs     r   hfsignorecleanr!   9   s0     !|w!|A		!S!A Hr   s   OpenVMSc                    SS K nUR                  US 5      n[        U[        5      (       a  Uc  U$ UR	                  S5      $ Uc  U$ U$ )Nr   zlatin-1)_deccgetenv
isinstancer   encode)selfkeydefaultr#   vs        r   newgetr+   O   sI    S$'A#u%%"#)wD)1DD"#)w22r   s   ascii)s   646s   ANSI_X3.4-1968s   utf-8s   cp65001r   encodings
   HGENCODINGasciis   HGENCODINGMODEs   strictencodingmodes
   ISO-8859-1c                  ^   ^  \ rS rSrSrS r\R                  (       a  SU 4S jjrS r	Sr
U =r$ )r   x   z`This class allows strings that are unmodified to be
round-tripped to the local encoding and backc                <    [         R                  X5      nXl        U$ N)r   __new___utf8)clsulr   s       r   r3   localstr.__new__|   s    MM#!r   c                0   > [         TU ]  U5        Xl        g r2   )super__init__r4   )r'   r6   r7   	__class__s      r   r;   localstr.__init__   s    GQJr   c                ,    [        U R                  5      $ r2   )hashr4   )r'   s    r   __hash__localstr.__hash__   s    DJJr   )r4   )r6   r   r7   r   returnNone)__name__
__module____qualname____firstlineno____doc__r3   typingTYPE_CHECKINGr;   r@   __static_attributes____classcell__r<   s   @r   r   r   x   s'    4
 	   r   c                      \ rS rSrSrSrg)safelocalstr   a,  Tagged string denoting it was previously an internal UTF-8 string,
and can be converted back to UTF-8 losslessly

>>> assert safelocalstr(b'\xc3') == b'\xc3'
>>> assert b'\xc3' == safelocalstr(b'\xc3')
>>> assert b'\xc3' in {safelocalstr(b'\xc3'): 0}
>>> assert safelocalstr(b'\xc3') in {b'\xc3': 0}
 N)rD   rE   rF   rG   rH   rK   rQ   r   r   rO   rO      s    r   rO   c                :   [        U 5      (       a  U $   U R                  S5      n[        S:X  a  U $ UR                  [	        [        5      S5      nXR                  [	        [        5      5      :X  a  [        U5      $ [        X5      $ ! [         a     U R                  [	        [        5      5      nUR                  [	        [        5      S5      nXR                  [	        [        5      5      :X  a  [        U5      s $ [        UR                  S5      U5      s $ ! [         a6    U R                  SS5      nUR                  [	        [        5      S5      s s $ f = ff = f! [         a.  n[        R                  " [        R                  " U5      SS9eSnAff = f)aU  
Convert a string from internal UTF-8 to local encoding

All internal strings should be UTF-8 but some repos before the
implementation of locale support may contain latin1 or possibly
other character sets. We attempt to decode everything strictly
using UTF-8, then Latin-1, and failing that, we use UTF-8 and
replace unknown characters.

The localstr class is used to cache the known UTF-8 encoding of
strings next to their local representation to allow lossless
round-trip conversion back to UTF-8.

>>> u = b'foo: \xc3\xa4' # utf-8
>>> l = tolocal(u)
>>> l
'foo: ?'
>>> fromlocal(l)
'foo: \xc3\xa4'
>>> u2 = b'foo: \xc3\xa1'
>>> d = { l: 1, tolocal(u2): 2 }
>>> len(d) # no collision
2
>>> b'foo: ?' in d
False
>>> l1 = b'foo: \xe4' # historical latin1 fallback
>>> l = tolocal(l1)
>>> l
'foo: ?'
>>> fromlocal(l) # magically in utf-8
'foo: \xc3\xa4'
zUTF-8s   UTF-8r   r   !   please check your locale settingshintN)
isasciistrdecoder,   r&   _sysstrrO   r   UnicodeDecodeErrorfallbackencodingLookupErrorr   Abortr
   bytestr)r   r6   rks       r   tolocalr`      sN   D !}}
	>!A8#*I6AHHWX.//#A&A>!! 	>
>HHW%567HHWX.	:!233'?* 1155% >HHWi0xx 19==>	>  
kkQ&J
 	

sr   B A
B =
B 
EA(D;E<E" >DEE" ;EEE" EEE" "
F,)FFc                $   [        U [        5      (       a  U R                  $ [        U 5      (       a  U $  U R	                  [        [        5      [        [        5      5      nUR                  S5      $ ! [         a]  nU [        SUR                  S-
  5      UR                  S-    n[        R                  " SU[        R                  " U5      4-  5      eSnAf[          a.  n[        R                  " [        R                  " U5      SS9eSnAff = f)ap  
Convert a string from the local character encoding to UTF-8

We attempt to decode strings using the encoding mode set by
HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
characters will cause an error message. Other modes include
'replace', which replaces unknown characters with a special
Unicode character, and 'ignore', which drops the character.
r   r   
   s   decoding near '%s': %s!NrS   rT   )r%   r   r4   rV   rW   rX   r,   r.   r&   rY   maxstartr   r\   r
   r]   r[   )r   r6   instsubr_   s        r   	fromlocalrg      s     !Xww!}}
HHWX&(=>xx   
AtzzB'$**r/:kk&#x/?/?/E)FF
 	
  
kkQ&J
 	

s$   <A2 2
D<ACD!)D

Dc                6    [        U R                  S5      5      $ )z;Convert a unicode string to a byte string of local encodingr   )r`   r&   )r6   s    r   
unitolocalri      s    188G$%%r   c                6    [        U 5      R                  S5      $ )z;Convert a byte string of local encoding to a unicode stringr   )rg   rW   r   s    r   unifromlocalrl      s    Q<w''r   c                   ^  U 4S jnU$ )zZCreate a proxy method that forwards __unicode__() and __str__() of
Python 3 to __bytes__()c                &   > [        T" U 5      5      $ r2   )rl   )obj	bytesfuncs    r   unifuncunimethod.<locals>.unifunc  s    IcN++r   rQ   )rp   rq   s   ` r   	unimethodrs     s    , Nr   c                    [        U 5      $ ! [         a     Of = f [        U [        5      (       a  U R                  R                  S5      nO,U R                  [        [        5      [        [        5      5      nUR                  5       nX:X  a  U $ UR                  [        [        5      5      $ ! [         a    U R                  5       s $ [         a.  n[        R                  " [        R                   " U5      SS9eSnAff = f)9best-effort encoding-aware case-folding of local string sr   rS   rT   N)
asciilowerrY   r%   r   r4   rW   rX   r,   r.   lowerr&   UnicodeErrorr[   r   r\   r
   r]   )r   r6   lur_   s       r   rw   rw     s    !} 
a""w'A*GL,ABAWWY7Hyy*++ wwy 
kkQ&J
 	

s0   
 
A3B0 B0 0D	D)C>>Dc                P     [        U 5      $ ! [         a    [        U 5      s $ f = f)ru   )
asciiupperrY   upperfallbackrk   s    r   upperr}   -  s+     !}  Q s   
 %%c                    [        U [        5      (       a  U R                  R                  S5      nO,U R                  [	        [
        5      [	        [        5      5      nUR                  5       nX:X  a  U $ UR                  [	        [
        5      5      $ ! [         a    U R                  5       s $ [         a.  n[        R                  " [        R                  " U5      SS9eS nAff = f)Nr   rS   rT   )r%   r   r4   rW   rX   r,   r.   r}   r&   rx   r[   r   r\   r
   r]   )r   r6   uur_   s       r   r|   r|   5  s    
a""w'A*GL,ABAWWY7Hyy*++ wwy 
kkQ&J
 	

s$   A3B 6B C'0	C'9)C""C'c                  0   ^  \ rS rSrSrSU 4S jjrSrU =r$ )WindowsEnvironiM  zE`os.environ` normalizes environment variables to uppercase on windowsc                6   > [         TU ]  [        U5      U5      $ r2   )r:   getr}   )r'   r(   r)   r<   s      r   r   WindowsEnviron.getP  s    w{5:w77r   rQ   r2   )rD   rE   rF   rG   rH   r   rK   rL   rM   s   @r   r   r   M  s    W8 8r   r   s   ^[a-z]:c                     [         R                  " 5       n [         R                  R                  U 5      n [	        U 5      n [
        R                  U 5      (       a  U SS R                  5       U SS  -   n U $ )Nr   r   )osgetcwdpathrealpath
strtolocalDRIVE_REmatchr}   )cwds    r   r   r   l  s[    iikggs#o>>#a(.."SW,C
r   s   HGENCODINGAMBIGUOUSs   narrows   wides   WFAs   WFc                R    [        U R                  [        [        5      S5      5      $ )zCFind the column width of a string for display in the local encodingr   )	ucolwidthrW   rX   r,   rk   s    r   colwidthr     s    QXXgh/;<<r   c                    [        [        SS5      nUb:  [        U  Vs/ s H#  o!" U5      [        ;   =(       a    S=(       d    SPM%     sn5      $ [	        U 5      $ s  snf )z5Find the column width of a Unicode string for displayeast_asian_widthN   r   )getattrunicodedatasum_widelen)deawr    s      r   r   r     sS    
+14
8C
:1CFeO).Q.:;;q6M ;s   *Ac                    [        X-   [        U 5      5       H  nXU n[        U5      U:X  d  M  Us  $    [        S5      e)zKUse colwidth to find a c-column substring of s starting at byte
index startzsubstring not found)ranger   r   
ValueError)r   rd   r    xts        r   getcolsr     sC     59c!f%AJA;!H & *
++r   c                    U R                  [        [        5      5      n[        U5      U::  a  U $ U[	        U5      -  nUS::  a  USU[	        U5      -    $ [        U5      nU(       a  UR                  5         Sn[        U5       H  u  pxU[        U5      -  nXa:  d  M    O   USW nU(       a  UR                  5         SR                  U5      R                  [        [        5      5      nU(       a  X$-   $ XB-   $ ! [         aV    [	        U 5      U::  a  U s $ U[	        U5      -  nUS::  a  USU[	        U5      -    s $ U(       a
  X U* S -   s $ U SU U-   s $ f = f)u   Trim string 's' to at most 'width' columns (including 'ellipsis').

If 'leftside' is True, left side of string 's' is trimmed.
'ellipsis' is always placed at trimmed side.

>>> from .node import bin
>>> def bprint(s):
...     print(pycompat.sysstr(s))
>>> ellipsis = b'+++'
>>> from . import encoding
>>> encoding.encoding = b'utf-8'
>>> t = b'1234567890'
>>> bprint(trim(t, 12, ellipsis=ellipsis))
1234567890
>>> bprint(trim(t, 10, ellipsis=ellipsis))
1234567890
>>> bprint(trim(t, 8, ellipsis=ellipsis))
12345+++
>>> bprint(trim(t, 8, ellipsis=ellipsis, leftside=True))
+++67890
>>> bprint(trim(t, 8))
12345678
>>> bprint(trim(t, 8, leftside=True))
34567890
>>> bprint(trim(t, 3, ellipsis=ellipsis))
+++
>>> bprint(trim(t, 1, ellipsis=ellipsis))
+
>>> u = u'あいうえお' # 2 x 5 = 10 columns
>>> t = u.encode(pycompat.sysstr(encoding.encoding))
>>> bprint(trim(t, 12, ellipsis=ellipsis))
ããããã
>>> bprint(trim(t, 10, ellipsis=ellipsis))
ããããã
>>> bprint(trim(t, 8, ellipsis=ellipsis))
ãã+++
>>> bprint(trim(t, 8, ellipsis=ellipsis, leftside=True))
+++ãã
>>> bprint(trim(t, 5))
ãã
>>> bprint(trim(t, 5, leftside=True))
ãã
>>> bprint(trim(t, 4, ellipsis=ellipsis))
+++
>>> bprint(trim(t, 4, ellipsis=ellipsis, leftside=True))
+++
>>> t = bin(b'112233445566778899aa') # invalid byte sequence
>>> bprint(trim(t, 12, ellipsis=ellipsis))
"3DUfwª
>>> bprint(trim(t, 10, ellipsis=ellipsis))
"3DUfwª
>>> bprint(trim(t, 8, ellipsis=ellipsis))
"3DU+++
>>> bprint(trim(t, 8, ellipsis=ellipsis, leftside=True))
+++fwª
>>> bprint(trim(t, 8))
"3DUfw
>>> bprint(trim(t, 8, leftside=True))
3DUfwª
>>> bprint(trim(t, 3, ellipsis=ellipsis))
+++
>>> bprint(trim(t, 1, ellipsis=ellipsis))
+
r   N )rW   rX   r,   rY   r   r   listreverse	enumeratejoinr&   )	r   widthellipsisleftsider6   charswidth_so_farr   r    s	            r   trimr     sW   L
$HHWX&' |u	S]Ez/%#h-/00GEL% 	!$ ! "1IE
gh/0A|<?  $q6U?HXA:3ec(m344j((%y8##$s#   C; ;E%E?EEEc                  $    \ rS rSrSrSrSrSrSrg)normcasespecsi   a`  what a platform's normcase does to ASCII strings

This is specified per platform, and should be consistent with what normcase
on that platform actually does.

lower: normcase lowercases ASCII strings
upper: normcase uppercases ASCII strings
other: the fallback function should always be called

This should be kept in sync with normcase_spec in util.h.r   r   rQ   N)	rD   rE   rF   rG   rH   rw   r}   otherrK   rQ   r   r   r   r      s    	A EEEr   r   c                |    [        U 5      n [        X!5      $ ! [         a     Of = f[        R                  " X!5      $ )a  returns a string suitable for JSON

JSON is problematic for us because it doesn't support non-Unicode
bytes. To deal with this, we take the following approach:

- localstr/safelocalstr objects are converted back to UTF-8
- valid UTF-8/ASCII strings are passed as-is
- other strings are converted to UTF-8b surrogate encoding
- apply JSON-specified string escaping

(escapes are doubled in these tests)

>>> jsonescape(b'this is a test')
'this is a test'
>>> jsonescape(b'escape characters: \0 \x0b \x7f')
'escape characters: \\u0000 \\u000b \\u007f'
>>> jsonescape(b'escape characters: \b \t \n \f \r \" \\')
'escape characters: \\b \\t \\n \\f \\r \\" \\\\'
>>> jsonescape(b'a weird byte: \xdd')
'a weird byte: \xed\xb3\x9d'
>>> jsonescape(b'utf-8: caf\xc3\xa9')
'utf-8: caf\xc3\xa9'
>>> jsonescape(b'')
''

If paranoid, non-ascii and common troublesome characters are also escaped.
This is suitable for web output.

>>> s = b'escape characters: \0 \x0b \x7f'
>>> assert jsonescape(s) == jsonescape(s, paranoid=True)
>>> s = b'escape characters: \b \t \n \f \r \" \\'
>>> assert jsonescape(s) == jsonescape(s, paranoid=True)
>>> jsonescape(b'escape boundary: \x7e \x7f \xc2\x80', paranoid=True)
'escape boundary: ~ \\u007f \\u0080'
>>> jsonescape(b'a weird byte: \xdd', paranoid=True)
'a weird byte: \\udcdd'
>>> jsonescape(b'utf-8: caf\xc3\xa9', paranoid=True)
'utf-8: caf\\u00e9'
>>> jsonescape(b'non-BMP: \xf0\x9d\x84\x9e', paranoid=True)
'non-BMP: \\ud834\\udd1e'
>>> jsonescape(b'<foo@example.org>', paranoid=True)
'\\u003cfoo@example.org\\u003e'
)toutf8b_jsonescapeu8fastr   charencodepurejsonescapeu8fallback)r   paranoidu8charss      r   
jsonescaper     sA    Z ajG 33 ..wAAs   
 
%%surrogatepass)r   r   r   r   r   r   r   r   r   r   r   r   r   r         c                    [         [        XUS-    5      S-	     nU(       d  XUS-    $ XX-    nUR                  S[        5        U$ )zget the next full utf-8 character in the given string, starting at pos

Raises a UnicodeError if the given location does not start a valid
utf-8 character.
r   r   r   )_utf8lenordrW   _utf8strict)r   posr7   r    s       r   getutf8charr   M  sR     	QS1W%&!+,AsQw	AHHWk"Hr   c                   [        U [        5      (       a  U R                  $ [        U [        5      (       a  [	        U 5      $ [        U 5      (       a  U $ SU ;  a   U R                  S[        5        U $ [        R                  " U 5      n [        5       nSn[        U 5      nX#:  ai   [        X5      nSUs=::  a  S::  a6  O  O3[        S[        X   5      -   5      R!                  S[        5      nUS-  nOU[        U5      -  n X-  nX#:  a  Mi  [#        U5      $ ! [         a     Nf = f! [         a5    [        S[        X   5      -   5      R!                  S[        5      nUS-  n Ndf = f)a  convert a local, possibly-binary string into UTF-8b

This is intended as a generic method to preserve data when working
with schemes like JSON and XML that have no provision for
arbitrary byte strings. As Mercurial often doesn't know
what encoding data is in, we use so-called UTF-8b.

If a string is already valid UTF-8 (or ASCII), it passes unmodified.
Otherwise, unsupported bytes are mapped to UTF-16 surrogate range,
uDC00-uDCFF.

Principles of operation:

- ASCII and UTF-8 data successfully round-trips and is understood
  by Unicode-oriented clients
- filenames and file contents in arbitrary other encodings can have
  be round-tripped or recovered by clueful clients
- local strings that have a cached known UTF-8 encoding (aka
  localstr) get sent as UTF-8 so Unicode-oriented clients get the
  Unicode data they want
- non-lossy local strings (aka safelocalstr) get sent as UTF-8 as well
- because we must preserve UTF-8 bytestring in places such as
  filenames, metadata can't be roundtripped without help

(Note: "UTF-8b" often refers to decoding a mix of valid UTF-8 and
arbitrary bytes into an internal Unicode format that can be
re-encoded back into the original. Here we are exposing the
internal surrogate encoding as a UTF-8 string.)
   r   r         i   r   )r%   r   r4   rO   rg   rV   rW   r   rY   r
   r]   	bytearrayr   r   unichrr   r&   r   r   r^   r   r7   r    s        r   r   r   _  sM   > !X ww	A|	$	$ |	Aa	HHWk*H 	AA
CAA
'
	A#A!666CK/077Mqs1v 	
 ' 8O) " 		  " 	vAF+,33G[IA1HC	s+   D &AD) 4D) 
D&%D&)<E('E(c                   [        U 5      (       a  U $ SU ;  a  U $ [        R                  " U 5      n [        5       nSn[	        U 5      nX#:  aj  [        X5      nU[	        U5      -  nSUs=::  a  S::  a9  O  O6[        R                  " [        UR                  S[        5      5      S-  5      nX-  nX#:  a  Mj  [        U5      $ )a  Given a UTF-8b string, return a local, possibly-binary string.

return the original binary string. This
is a round-trip process for strings like filenames, but metadata
that's was passed through tolocal will remain in UTF-8.

>>> roundtrip = lambda x: fromutf8b(toutf8b(x)) == x
>>> m = b"\xc3\xa9\x99abcd"
>>> toutf8b(m)
'\xc3\xa9\xed\xb2\x99abcd'
>>> roundtrip(m)
True
>>> roundtrip(b"\xc2\xc2\x80")
True
>>> roundtrip(b"\xef\xbf\xbd")
True
>>> roundtrip(b"\xef\xef\xbf\xbd")
True
>>> roundtrip(b"\xf1\x80\x80\x80\x80")
True
r   r   r   r   r      )rV   r
   r]   r   r   r   bytechrr   rW   r   r   r   s        r   	fromutf8br     s    . !}}a 	AA
CAA
's1va2?2  QXXg{%C!Dt!KLA	 ' 8Or   )r   r   rB   r   r2   )r6   strrB   r   )r   r   rB   r   )rp   zCallable[[Any], bytes]rB   zCallable[[Any], str])r   r   rB   r   )r   r   rB   int)r   r   rB   r   )r   r   rd   r   r    r   rB   r   )r   F)
r   r   r   r   r   r   r   boolrB   r   )F)r   r   r   r   rB   r   )r   r   r   r   rB   r   )X
__future__r   localer   rerI   r   r   r   r   r   r   r	   r
   
interfacesr   intmodpurer   r   r   	importmod__annotations__rV   rv   r{   jsonescapeu8fastr   sysstrrX   chrr   splitr   r&   r   allr!   supports_bytes_environ_nativeenvironenvironbenvironsysplatformr+   r<   r   items_encodingrewrites	iswindowsr,   getpreferredencodingErrorr.   rZ   r   r   rO   r`   rg   ri   rl   rs   r   strfromlocal	strmethodrw   r}   r|   dictr   r_   r*   compiler   r   getcwdbr   r   r   r   r   r   r   r   r   r   r   r   )r   r_   r*   s   000r   <module>r      s   #  	 	     * .\4
"("2"2<"@
 @""
""
""
// 
//	**/%'22 3q":g&2 
=W
=== ==  **kkGz)	3 !' JJ$$&&DA 	
188G,,&G   $,j!% {{=#.H..077@LH$((8< kk"3Y?e ?   u  &5 A
H
>&
(
 
	
0 
&  	8T 	8 !"

  "1.5ahhw6G.H)*+ # ::j!  ZZF 	KK&	2g= 	=
, 	ggg g 	g
 gT "2Bn ;$AH-gZ* || Hs   *(M=-N'AN NN