x3x3x3x_5h3ll
— 53cur3 — 5h3ll_1d —
Linux vps-10654784.cedaps.org.br 3.10.0-1160.119.1.el7.x86_64 #1 SMP Tue Jun 4 14:43:51 UTC 2024 x86_64
  INFO SERVER : Apache PHP : 7.4.33
/lib64/perl5/Unicode/
162.240.100.168

 
[ NAME ] [ SIZE ] [ PERM ] [ DATE ] [ ACTN ]
+FILE +DIR
Collate dir drwxr-xr-x 2024-07-07 21:56 R D
Collate.pm 55.094 KB -rw-r--r-- 2023-10-25 12:41 R E G D
Normalize.pm 17.618 KB -rw-r--r-- 2023-10-25 12:41 R E G D
REQUEST EXIT
package Unicode::Normalize; BEGIN { unless ("A" eq pack('U', 0x41)) { die "Unicode::Normalize cannot stringify a Unicode code point\n"; } } use 5.006; use strict; use warnings; use Carp; no warnings 'utf8'; our $VERSION = '1.14'; our $PACKAGE = __PACKAGE__; our @EXPORT = qw( NFC NFD NFKC NFKD ); our @EXPORT_OK = qw( normalize decompose reorder compose checkNFD checkNFKD checkNFC checkNFKC check getCanon getCompat getComposite getCombinClass isExclusion isSingleton isNonStDecomp isComp2nd isComp_Ex isNFD_NO isNFC_NO isNFC_MAYBE isNFKD_NO isNFKC_NO isNFKC_MAYBE FCD checkFCD FCC checkFCC composeContiguous splitOnLastStarter normalize_partial NFC_partial NFD_partial NFKC_partial NFKD_partial ); our %EXPORT_TAGS = ( all => [ @EXPORT, @EXPORT_OK ], normalize => [ @EXPORT, qw/normalize decompose reorder compose/ ], check => [ qw/checkNFD checkNFKD checkNFC checkNFKC check/ ], fast => [ qw/FCD checkFCD FCC checkFCC composeContiguous/ ], ); ## ## utilities for tests ## sub pack_U { return pack('U*', @_); } sub unpack_U { return unpack('U*', shift(@_).pack('U*')); } require Exporter; ##### The above part is common to XS and PP ##### our @ISA = qw(Exporter DynaLoader); require DynaLoader; bootstrap Unicode::Normalize $VERSION; ##### The below part is common to XS and PP ##### ## ## normalize ## sub FCD ($) { my $str = shift; return checkFCD($str) ? $str : NFD($str); } our %formNorm = ( NFC => \&NFC, C => \&NFC, NFD => \&NFD, D => \&NFD, NFKC => \&NFKC, KC => \&NFKC, NFKD => \&NFKD, KD => \&NFKD, FCD => \&FCD, FCC => \&FCC, ); sub normalize($$) { my $form = shift; my $str = shift; if (exists $formNorm{$form}) { return $formNorm{$form}->($str); } croak($PACKAGE."::normalize: invalid form name: $form"); } ## ## partial ## sub normalize_partial ($$) { if (exists $formNorm{$_[0]}) { my $n = normalize($_[0], $_[1]); my($p, $u) = splitOnLastStarter($n); $_[1] = $u; return $p; } croak($PACKAGE."::normalize_partial: invalid form name: $_[0]"); } sub NFD_partial ($) { return normalize_partial('NFD', $_[0]) } sub NFC_partial ($) { return normalize_partial('NFC', $_[0]) } sub NFKD_partial($) { return normalize_partial('NFKD',$_[0]) } sub NFKC_partial($) { return normalize_partial('NFKC',$_[0]) } ## ## check ## our %formCheck = ( NFC => \&checkNFC, C => \&checkNFC, NFD => \&checkNFD, D => \&checkNFD, NFKC => \&checkNFKC, KC => \&checkNFKC, NFKD => \&checkNFKD, KD => \&checkNFKD, FCD => \&checkFCD, FCC => \&checkFCC, ); sub check($$) { my $form = shift; my $str = shift; if (exists $formCheck{$form}) { return $formCheck{$form}->($str); } croak($PACKAGE."::check: invalid form name: $form"); } 1; __END__ =head1 NAME Unicode::Normalize - Unicode Normalization Forms =head1 SYNOPSIS (1) using function names exported by default: use Unicode::Normalize; $NFD_string = NFD($string); # Normalization Form D $NFC_string = NFC($string); # Normalization Form C $NFKD_string = NFKD($string); # Normalization Form KD $NFKC_string = NFKC($string); # Normalization Form KC (2) using function names exported on request: use Unicode::Normalize 'normalize'; $NFD_string = normalize('D', $string); # Normalization Form D $NFC_string = normalize('C', $string); # Normalization Form C $NFKD_string = normalize('KD', $string); # Normalization Form KD $NFKC_string = normalize('KC', $string); # Normalization Form KC =head1 DESCRIPTION Parameters: C<$string> is used as a string under character semantics (see F). C<$code_point> should be an unsigned integer representing a Unicode code point. Note: Between XSUB and pure Perl, there is an incompatibility about the interpretation of C<$code_point> as a decimal number. XSUB converts C<$code_point> to an unsigned integer, but pure Perl does not. Do not use a floating point nor a negative sign in C<$code_point>. =head2 Normalization Forms =over 4 =item C<$NFD_string = NFD($string)> It returns the Normalization Form D (formed by canonical decomposition). =item C<$NFC_string = NFC($string)> It returns the Normalization Form C (formed by canonical decomposition followed by canonical composition). =item C<$NFKD_string = NFKD($string)> It returns the Normalization Form KD (formed by compatibility decomposition). =item C<$NFKC_string = NFKC($string)> It returns the Normalization Form KC (formed by compatibility decomposition followed by B composition). =item C<$FCD_string = FCD($string)> If the given string is in FCD ("Fast C or D" form; cf. UTN #5), it returns the string without modification; otherwise it returns an FCD string. Note: FCD is not always unique, then plural forms may be equivalent each other. C will return one of these equivalent forms. =item C<$FCC_string = FCC($string)> It returns the FCC form ("Fast C Contiguous"; cf. UTN #5). Note: FCC is unique, as well as four normalization forms (NF*). =item C<$normalized_string = normalize($form_name, $string)> It returns the normalization form of C<$form_name>. As C<$form_name>, one of the following names must be given. 'C' or 'NFC' for Normalization Form C (UAX #15) 'D' or 'NFD' for Normalization Form D (UAX #15) 'KC' or 'NFKC' for Normalization Form KC (UAX #15) 'KD' or 'NFKD' for Normalization Form KD (UAX #15) 'FCD' for "Fast C or D" Form (UTN #5) 'FCC' for "Fast C Contiguous" (UTN #5) =back =head2 Decomposition and Composition =over 4 =item C<$decomposed_string = decompose($string [, $useCompatMapping])> It returns the concatenation of the decomposition of each character in the string. If the second parameter (a boolean) is omitted or false, the decomposition is canonical decomposition; if the second parameter (a boolean) is true, the decomposition is compatibility decomposition. The string returned is not always in NFD/NFKD. Reordering may be required. $NFD_string = reorder(decompose($string)); # eq. to NFD() $NFKD_string = reorder(decompose($string, TRUE)); # eq. to NFKD() =item C<$reordered_string = reorder($string)> It returns the result of reordering the combining characters according to Canonical Ordering Behavior. For example, when you have a list of NFD/NFKD strings, you can get the concatenated NFD/NFKD string from them, by saying $concat_NFD = reorder(join '', @NFD_strings); $concat_NFKD = reorder(join '', @NFKD_strings); =item C<$composed_string = compose($string)> It returns the result of canonical composition without applying any decomposition. For example, when you have a NFD/NFKD string, you can get its NFC/NFKC string, by saying $NFC_string = compose($NFD_string); $NFKC_string = compose($NFKD_string); =item C<($processed, $unprocessed) = splitOnLastStarter($normalized)> It returns two strings: the first one, C<$processed>, is a part before the last starter, and the second one, C<$unprocessed> is another part after the first part. A starter is a character having a combining class of zero (see UAX #15). Note that C<$processed> may be empty (when C<$normalized> contains no starter or starts with the last starter), and then C<$unprocessed> should be equal to the entire C<$normalized>. When you have a C<$normalized> string and an C<$unnormalized> string following it, a simple concatenation is wrong: $concat = $normalized . normalize($form, $unnormalized); # wrong! Instead of it, do like this: ($processed, $unprocessed) = splitOnLastStarter($normalized); $concat = $processed . normalize($form, $unprocessed.$unnormalized); C should be called with a pre-normalized parameter C<$normalized>, that is in the same form as C<$form> you want. If you have an array of C<@string> that should be concatenated and then normalized, you can do like this: my $result = ""; my $unproc = ""; foreach my $str (@string) { $unproc .= $str; my $n = normalize($form, $unproc); my($p, $u) = splitOnLastStarter($n); $result .= $p; $unproc = $u; } $result .= $unproc; # instead of normalize($form, join('', @string)) =item C<$processed = normalize_partial($form, $unprocessed)> A wrapper for the combination of C and C. Note that C<$unprocessed> will be modified as a side-effect. If you have an array of C<@string> that should be concatenated and then normalized, you can do like this: my $result = ""; my $unproc = ""; foreach my $str (@string) { $unproc .= $str; $result .= normalize_partial($form, $unproc); } $result .= $unproc; # instead of normalize($form, join('', @string)) =item C<$processed = NFD_partial($unprocessed)> It does like C. Note that C<$unprocessed> will be modified as a side-effect. =item C<$processed = NFC_partial($unprocessed)> It does like C. Note that C<$unprocessed> will be modified as a side-effect. =item C<$processed = NFKD_partial($unprocessed)> It does like C. Note that C<$unprocessed> will be modified as a side-effect. =item C<$processed = NFKC_partial($unprocessed)> It does like C. Note that C<$unprocessed> will be modified as a side-effect. =back =head2 Quick Check (see Annex 8, UAX #15; and F) The following functions check whether the string is in that normalization form. The result returned will be one of the following: YES The string is in that normalization form. NO The string is not in that normalization form. MAYBE Dubious. Maybe yes, maybe no. =over 4 =item C<$result = checkNFD($string)> It returns true (C<1>) if C; false (C) if C. =item C<$result = checkNFC($string)> It returns true (C<1>) if C; false (C) if C; C if C. =item C<$result = checkNFKD($string)> It returns true (C<1>) if C; false (C) if C. =item C<$result = checkNFKC($string)> It returns true (C<1>) if C; false (C) if C; C if C. =item C<$result = checkFCD($string)> It returns true (C<1>) if C; false (C) if C. =item C<$result = checkFCC($string)> It returns true (C<1>) if C; false (C) if C; C if C. Note: If a string is not in FCD, it must not be in FCC. So C should return C. =item C<$result = check($form_name, $string)> It returns true (C<1>) if C; false (C) if C; C if C. As C<$form_name>, one of the following names must be given. 'C' or 'NFC' for Normalization Form C (UAX #15) 'D' or 'NFD' for Normalization Form D (UAX #15) 'KC' or 'NFKC' for Normalization Form KC (UAX #15) 'KD' or 'NFKD' for Normalization Form KD (UAX #15) 'FCD' for "Fast C or D" Form (UTN #5) 'FCC' for "Fast C Contiguous" (UTN #5) =back B In the cases of NFD, NFKD, and FCD, the answer must be either C or C. The answer C may be returned in the cases of NFC, NFKC, and FCC. A C string should contain at least one combining character or the like. For example, C has the MAYBE_NFC/MAYBE_NFKC property. Both C and C will return C. C<"A\N{COMBINING ACUTE ACCENT}"> is not in NFC (its NFC is C<"\N{LATIN CAPITAL LETTER A WITH ACUTE}">), while C<"B\N{COMBINING ACUTE ACCENT}"> is in NFC. If you want to check exactly, compare the string with its NFC/NFKC/FCC. if ($string eq NFC($string)) { # $string is exactly normalized in NFC; } else { # $string is not normalized in NFC; } if ($string eq NFKC($string)) { # $string is exactly normalized in NFKC; } else { # $string is not normalized in NFKC; } =head2 Character Data These functions are interface of character dat