# Edge cases

\\
\x00
\t
\n
\r
\x1f
\x20
!
~
\x7f
\x80
\x9f
\xa0
\xfe
\xff
\ufeff
\ufffe
\U0010ffff

# ASCII Punctuation

~`!@#$%^&*()-_=+[{}]\\|;:'",<.>/?

# Kevin

Kevin
\u212aevin
\u039aevin
\u1e32evin
\u1e34evin
K\u0301evin
K\u0301\u0301evin
K\ua717evin
\u24c0evin
\u2c69evin
\ua740evin
\ua742evin
\ua744evin
\ua7a2evin
\uff2bevin
\U000e004bevin
\U0001f11aevin
\U0001f13aevin
\U0001f15aevin
\U0001f17aevin

# From RFC 7564, 7613

StPeter
Juliet
Juliet@Example.COM
Fussball
Fu\xdfball
\u03c0
\u03a3
\u03c3
\u03c2
ju1iet
\u13da\u13a2\u13b5\u13ac\u13a2\u13ac\u13d2
Foo
foo
Foo Bar
foo bar
\u03a3
\u03c3
\u03c2
\u265a
Richard \u2163

# From RFC 6452: Derived property changed in 6.0.

\u0cf1
\u0cf2
\u19da

# "dotless i" in several Turkic languages (SpecialCasing.txt)
# (It looks like Python3 ignores locale-specific special casing rules.)

i\u0130\u0131I
I\u0307
\u0131\u0307

#

A\ua717
\xe9
e\u0301\u0301\u0301
e\u0301
o
\u03bf
\u043e
\ufec9
\ufeca
\ufecb
\ufecc
\uff76
\u30ab

\u3300
\u30a2\u30d1\u30fc\u30c8

i\u2079
\xbc

\u01c6
\ufe37

\u210c
\u210d

\u2e2f

# These become ASCII after NFC.

\u037e
\u1fef
\u212a


# Full-width characters

\uff01
\uff2a\uff55\uff4c\uff49\uff45\uff54
\uff04
\ufe69

Rs
\u20a8

# Regional Indicator Symbols

\U0001f1fa\U0001f1f8


# Look-alikes

K
\u212a
\u24c0
\u2460

\u03a9
\u2126

\xc7
C\u0327

q\u0307\u0323
q\u0323\u0307

\uac00
\u1100\u1161

\u212b
A\u030a
s\u0323\u0307
\ufb01

\uac01
\u1100\u1161\u11a8

\U0001f47e
\U0001f49a

# Zalgo Text

Z\u0351\u036b\u0343\u036a\u0302\u036b\u033d\u0334\u0319\u0324\u031e\u0349\u035a\u032f\u031e\u0320\u034d

Z\u0351\u036b\u0343\u036a\u0302\u036b\u033d\u0334\u0319\u0324\u031e\u0349\u035a\u032f\u031e\u0320\u034dA\u036b\u0357\u0334\u0362\u0335\u031c\u0330\u0354L\u0368\u0367\u0369\u0358\u0320G\u0311\u0357\u030e\u0305\u035b\u0341\u0334\u033b\u0348\u034d\u0354\u0339O\u0342\u030c\u030c\u0358\u0328\u0335\u0339\u033b\u031d\u0333!\u033f\u030b\u0365\u0365\u0302\u0363\u0310\u0301\u0301\u035e\u035c\u0356\u032c\u0330\u0319\u0317

Z\u0351\u036b\u0343\u036a\u0302\u036b\u033d\u034f\u0334\u0319\u0324\u031e\u0349\u035a\u032f\u031e\u0320\u034dA\u036b\u0357\u0334\u0362\u0335\u031c\u0330\u0354L\u0368\u0367\u0369\u0358\u0320G\u0311\u0357\u030e\u0305\u035b\u0341\u0334\u033b\u0348\u034d\u0354\u0339O\u0342\u030c\u030c\u0358\u0328\u0335\u0339\u033b\u031d\u0333!\u033f\u030b\u0365\u0365\u0302\u0363\u0310\u0301\u0301\u035e\u035c\u0356\u032c\u0330\u0319\u0317

Z\u0315\u0313\u030c\u035b\u0350\u0306\u0311\u0307\u0342\u034c\u032d\u0322\u0326\u0319\u0318\u0318\u0324\u0347\u0320\u0334A\u033f\u0311\u0309\u0308\u0302\u0308\u034c\u035c\u031d\u0327\u031d\u0356\u032c\u0329\u032f\u0334L\u031b\u034a\u0314\u0307\u0352\u0352\u0309\u0327\u032c\u0316\u0330\u032b\u0362\u033c\u0329\u035f\u0356G\u033d\u0307\u0360\u0350\u0313\u030c\u0342\u035d\u0313\u031d\u031e\u033c\u031d\u034d\u0336O\u033f\u0308\u035b\u0306\u030f\u034b\u0342\u032b\u032f\u0320\u0348\u032f\u0356\u033a\u0321


# Allowed "has_compat" characters (due to NFC)

\u037e

\u0340\u0341\u0343\u0344\u0374\u037e\u0958\u0959\u095a\u095b\u095c\u095d\u095e\u095f\u09dc\u09dd\u09df\u0a33\u0a36\u0a59\u0a5a\u0a5b\u0a5e\u0b5c\u0b5d\u0f43\u0f4d\u0f52\u0f57\u0f5c\u0f69\u0f73\u0f75\u0f76\u0f78\u0f81\u0f93\u0f9d\u0fa2\u0fa7\u0fac\u0fb9\u1f71\u1f73\u1f75\u1f77\u1f79\u1f7b\u1f7d\u1fbb\u1fbe\u1fc9\u1fcb\u1fd3\u1fdb\u1fe3\u1feb\u1fef\u1ff9\u1ffb\u2126\u212a\u212b

# "mapped to nothing" https://tools.ietf.org/html/rfc3454#appendix-B.1

\xad
\u034f
\u1806
\u180b
\u180c
\u180d
\u200b
\u200c
\u200d
\u2060
\ufe00
\ufe01
\ufe02
\ufe03
\ufe04
\ufe05
\ufe06
\ufe07
\ufe08
\ufe09
\ufe0a
\ufe0b
\ufe0c
\ufe0d
\ufe0e
\ufe0f
\ufeff

# Spaces

Juliet\x20Capulet
\x20Juliet
Juliet\x20
Juliet\x20\x20Capulet

# Quotes

"Juliet"
'Juliet'
'Juliet\x20'
'\x20Juliet'
'Juliet\x20Capulet'
'Juliet\x20\x20Capulet'

# Bidirectional text

\u05d0\u05d1
\u05d0\u20d6\u05d1
# This test fails on systems before Unicode 7.0; U+1AB6 was introduced in 7.0.
\u05d0\u1ab6\u05d1
\u05d0\u05d1+
+\u05d0\u05d1
A\u05d0\u05d1
\u05d0\u05d1A
\u05d0A\u05d1

# When rendered RTL, parens are mirrored but not solidus (on my mac).
\u05d0(/%\\))\u05d1

# Only apply bidi rule for strings with RTL codepoints.

Juliet+
+Juliet
Juliet\u05d1

# Past normalization instability points (http://www.unicode.org/review/pr-29.html)

\u0B47\u0300\u0B3E\u0323
\u1100\u0300\u1161\u0323

# Not idempotent under Nickname (case preserved)?

\xa8
\xaf
\xb4
\xb8
\u02d8
\u02d9
\u02da
\u02db
\u02dc
\u02dd
\u037a
\u0384
\u0385
\u1fbd
\u1fbf
\u1fc0
\u1fc1
\u1fcd
\u1fce
\u1fcf
\u1fdd
\u1fde
\u1fdf
\u1fed
\u1fee
\u1ffd
\u1ffe
\u2017
\u203e
\u309b
\u309c
\ufc5e
\ufc5f
\ufc60
\ufc61
\ufc62
\ufc63
\ufe49
\ufe4a
\ufe4b
\ufe4c
\ufe70
\ufe72
\ufe74
\ufe76
\ufe78
\ufe7a
\ufe7c
\ufe7e
\uffe3

# middle_dot
# https://www.ietf.org/mail-archive/web/precis/current/msg01169.html

\u00b7
\u00b7l
l\u00b7
rul\u00b7lz
ruL\u00b7Lz
rul\u00b7ze
ru\u00b7lze

# Exceptions

# PVALID -- Would otherwise have been DISALLOWED
\u00DF
\u03C2
\u06FD
\u06FE
\u0F0B
\u3007

# CONTEXTO -- Would otherwise have been DISALLOWED
\u00B7
\u0375
\u05F3
\u05F4
\u30FB

# CONTEXTO -- Would otherwise have been PVALID
# These violate the bidi rule.
\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669
\u06F0\u06F1\u06F2\u06F3\u06F4\u06F5\u06F6\u06F7\u06F8\u06F9

# These are okay with the bidi rule.
\u0623\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669
\u0623\u06F0\u06F1\u06F2\u06F3\u06F4\u06F5\u06F6\u06F7\u06F8\u06F9

# These mix arabic indic and arabic extended indic.
\u0623\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669\u06F0
\u0623\u06F0\u06F1\u06F2\u06F3\u06F4\u06F5\u06F6\u06F7\u06F8\u06F9\u0660

# DISALLOWED -- Would otherwise have been PVALID
\u0640
\u07FA
\u302E
\u302F
\u3031
\u3032
\u3033
\u3034
\u3035
\u303B

# Other

\u0600
\U000F0000
\U000E01F0
\uD800
\uD8FF

# Discussed on mailing list.

\u03A8\u03D3\u03A7\u0397

# From idempotent_nfc_check.py

\u01f0j\u030c
\u0390\u03b9\u0308\u0301
\u03b0\u03c5\u0308\u0301
\u1e96h\u0331
\u1e97t\u0308
\u1e98w\u030a
\u1e99y\u030a
\u1f50\u03c5\u0313
\u1f52\u03c5\u0313\u0300
\u1f54\u03c5\u0313\u0301
\u1f56\u03c5\u0313\u0342
\u1fb6\u03b1\u0342
\u1fb7\u03b1\u0342\u0345
\u1fc6\u03b7\u0342
\u1fc7\u03b7\u0342\u0345
\u1fd2\u03b9\u0308\u0300
\u1fd3\u03b9\u0308\u0301
\u1fd6\u03b9\u0342
\u1fd7\u03b9\u0308\u0342
\u1fe2\u03c5\u0308\u0300
\u1fe3\u03c5\u0308\u0301
\u1fe4\u03c1\u0313
\u1fe6\u03c5\u0342
\u1fe7\u03c5\u0308\u0342
\u1ff6\u03c9\u0342
\u1ff7\u03c9\u0342\u0345

# Samples from http://unicode.org/faq/idn.html

fa\u00DF
T\u00DCRKIYE
t\u00FCrk\u0131ye
\u0392\u03CC\u03BB\u03BF\u03C2
\u0392\u03CC\u03BB\u03BF\u03A3

\u03A3\u03A3

# Between Unicode 10.0 and 11.0, 70089 (0x111c9, SHARADA SANDHI MARK) changed 
# from FREE_PVAL to PVALID.

\U000111c9

