From 8aea5020dc2bb472876cab6c468908b6c45e854d Mon Sep 17 00:00:00 2001 From: John Naylor Date: Fri, 15 May 2020 11:13:15 +0800 Subject: [PATCH v1 2/3] Use perfect hashing for NFC Unicode normalization quick check This makes the normalization check nearly 40% faster, while only adding 5kB to the binary size. The hash lookup reuses the existing array of bitfields used for binary search to get the quick check property and is generated as part of make update-unicode. Since the hash function is part of the repository, format it like pgindent would. NFKC quick check still uses binary search. --- .../generate-unicode_normprops_table.pl | 38 +- src/common/unicode_norm.c | 36 +- src/include/common/unicode_normprops_table.h | 344 ++++++++++++++++++ src/tools/PerfectHash.pm | 2 +- 4 files changed, 412 insertions(+), 8 deletions(-) diff --git a/src/common/unicode/generate-unicode_normprops_table.pl b/src/common/unicode/generate-unicode_normprops_table.pl index e8e5097c09..7f00cb0ae4 100644 --- a/src/common/unicode/generate-unicode_normprops_table.pl +++ b/src/common/unicode/generate-unicode_normprops_table.pl @@ -9,6 +9,10 @@ use strict; use warnings; +use FindBin; +use lib "$FindBin::RealBin/../../tools/"; +use PerfectHash; + my %data; print @@ -17,6 +21,8 @@ print print <) @@ -66,6 +79,7 @@ foreach my $prop (sort keys %data) "static const pg_unicode_normprops UnicodeNormProps_${prop}[] = {\n"; my %subdata = %{ $data{$prop} }; + my @cp_packed; foreach my $cp (sort { $a <=> $b } keys %subdata) { my $qc; @@ -82,7 +96,29 @@ foreach my $prop (sort keys %data) die; } printf "\t{0x%04X, %s},\n", $cp, $qc; + + # Save the bytes as a string in network order. + push @cp_packed, pack('N', $cp); } print "};\n"; + + # Emit the definition of the hash function. + + # To save space, skip building the function for the larger "C" form. + next if $prop eq "NFKC_QC"; + + my $funcname = $prop . '_hash_func'; + + my $f = PerfectHash::generate_hash_function(\@cp_packed, $funcname, + fixed_key_length => 4); + print "\nstatic $f\n"; + + # Emit the struct that wraps the hash lookup info into one variable. + printf "static const unicode_norm_info "; + printf "UnicodeNormInfo_%s = {\n", $prop; + printf "\tUnicodeNormProps_%s,\n", $prop; + printf "\t%s,\n", $funcname; + printf "\t%d\n", scalar @cp_packed; + printf "};\n"; } diff --git a/src/common/unicode_norm.c b/src/common/unicode_norm.c index ab5ce59345..1714837e64 100644 --- a/src/common/unicode_norm.c +++ b/src/common/unicode_norm.c @@ -476,6 +476,34 @@ qc_compare(const void *p1, const void *p2) return (v1 - v2); } +static const pg_unicode_normprops * +qc_hash_lookup(pg_wchar ch, const unicode_norm_info * norminfo) +{ + int h; + uint32 hashkey; + + /* + * Compute the hash function. The hash key is the codepoint with the bytes + * in network order. + */ + hashkey = htonl(ch); + h = norminfo->hash(&hashkey); + + /* An out-of-range result implies no match */ + if (h < 0 || h >= norminfo->num_normprops) + return NULL; + + /* + * Since it's a perfect hash, we need only match to the specific codepoint + * it identifies. + */ + if (ch != norminfo->normprops[h].codepoint) + return NULL; + + /* Success! */ + return &norminfo->normprops[h]; +} + /* * Look up the normalization quick check character property */ @@ -483,18 +511,14 @@ static UnicodeNormalizationQC qc_is_allowed(UnicodeNormalizationForm form, pg_wchar ch) { pg_unicode_normprops key; - pg_unicode_normprops *found = NULL; + const pg_unicode_normprops *found = NULL; key.codepoint = ch; switch (form) { case UNICODE_NFC: - found = bsearch(&key, - UnicodeNormProps_NFC_QC, - lengthof(UnicodeNormProps_NFC_QC), - sizeof(pg_unicode_normprops), - qc_compare); + found = qc_hash_lookup(ch, &UnicodeNormInfo_NFC_QC); break; case UNICODE_NFKC: found = bsearch(&key, diff --git a/src/include/common/unicode_normprops_table.h b/src/include/common/unicode_normprops_table.h index 93a2e55b75..4e8d2c46d6 100644 --- a/src/include/common/unicode_normprops_table.h +++ b/src/include/common/unicode_normprops_table.h @@ -2,6 +2,8 @@ #include "common/unicode_norm.h" +typedef int (*qc_hash_func) (const void *key); + /* * We use a bit field here to save space. */ @@ -11,6 +13,13 @@ typedef struct signed int quickcheck:4; /* really UnicodeNormalizationQC */ } pg_unicode_normprops; +typedef struct +{ + const pg_unicode_normprops *normprops; + qc_hash_func hash; + int num_normprops; +} unicode_norm_info; + static const pg_unicode_normprops UnicodeNormProps_NFC_QC[] = { {0x0300, UNICODE_NORM_QC_MAYBE}, {0x0301, UNICODE_NORM_QC_MAYBE}, @@ -1245,6 +1254,341 @@ static const pg_unicode_normprops UnicodeNormProps_NFC_QC[] = { {0x2FA1D, UNICODE_NORM_QC_NO}, }; +static int +NFC_QC_hash_func(const void *key) +{ + static const int16 h[2463] = { + 0, -2717, 0, 221, 1293, 223, 1295, 225, + 226, 241, 0, 229, 230, 231, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + -386, 0, 0, 0, 0, 0, 0, 0, + -163, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + -246, -175, 1260, 0, 0, 0, -174, -173, + 0, -172, 0, 0, 0, 0, 0, 0, + 1049, 0, 300, 301, 1071, 0, 1071, 0, + 1071, 1071, 1057, 0, 0, 0, 0, 1061, + 0, -1053, 1664, 0, 2956, 0, 0, -13, + 0, 0, 0, 0, 2156, 0, 0, 0, + 0, 0, 0, 0, 71, 0, 1082, 0, + 1083, 1083, 0, 1084, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 359, 360, 361, + -1091, 363, -762, -130, -129, -128, -127, -126, + 137, -124, -708, -707, -706, -120, -185, -705, + -117, -184, -1307, -114, -113, -112, -111, 0, + 386, 387, 388, 389, -90, 391, 171, 172, + 394, -94, -183, 397, 398, 399, -98, -225, + 402, -1019, -636, -1019, -225, 407, 408, 409, + 410, 411, 674, 413, -171, -170, -169, 417, + 352, -168, 420, 353, -770, 423, 424, 425, + 426, 427, 428, 32767, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 32767, 32767, 237, 32767, 236, 32767, + 32767, 234, 234, 234, 234, 617, 234, 234, + 234, -2483, 234, -1430, 1526, -1430, 1527, 47, + 48, 471, 230, 32767, 32767, 32767, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + -159, 227, 227, 227, 227, 227, 227, 227, + 64, 227, 227, 227, 227, 227, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + 227, 227, 227, 227, 227, 227, 227, 227, + -19, 52, 1487, 227, 227, 227, 53, 54, + 227, 55, 227, 227, 227, 227, 227, 227, + 1276, 227, -989, 32767, 1296, 225, 1296, 225, + 1296, 1296, 1282, 225, 225, 225, 225, 1286, + 225, -828, 1889, 225, 3181, 225, 225, 212, + 225, 225, 225, 225, 2381, 225, 225, 225, + 225, 225, 225, 225, 296, 225, 1307, 225, + 1308, 1308, 225, 1309, 225, 225, 225, 225, + 225, 225, 225, 225, 225, 225, 225, 225, + 225, 225, 225, 225, 225, 584, 585, 586, + -866, 588, -537, 95, 96, 97, 98, 99, + 362, 101, -483, -482, -481, 105, 40, -480, + 108, 41, -1082, 111, 112, 113, 114, 225, + 611, 612, 613, 614, 135, 616, 396, 397, + 619, 131, 42, 622, 623, 624, 127, 0, + 627, -794, -411, -794, 0, 632, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + -272, 32767, 32767, 32767, 0, 32767, 32767, 32767, + 32767, 32767, -166, -165, 32767, 32767, 32767, 32767, + -164, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 397, 32767, 396, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 386, + 0, 386, 386, 386, 386, 386, 386, 386, + 223, 386, 386, 386, 32767, 385, 385, 385, + 385, 385, 32767, 384, 32767, 383, 383, 32767, + 382, 382, 32767, 381, 381, 381, 381, 381, + 135, 206, 1641, 381, 32767, 32767, 32767, 32767, + 32767, 32767, -160, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 1148, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 0, + 32767, 32767, 32767, 0, 0, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, -257, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, -910, -910, 32767, 32767, + 0, 32767, 0, 32767, 0, 32767, 0, 32767, + 147, 32767, 0, 32767, 0, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 0, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 143, 32767, 144, 32767, 145, + 32767, 146, 32767, 0, 32767, 148, 32767, 149, + 32767, 32767, 32767, -160, 32767, 32767, 32767, 32767, + 32767, 32767, 15, 32767, 32767, 0, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 145, 32767, 144, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 0, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 0, -148, 32767, 32767, 32767, 32767, + 32767, 32767, 2009, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 0, 32767, 32767, 135, -918, 32767, + 151, 32767, 32767, 0, 1, 2, 3, 4, + 133, 5, 6, 7, 8, 9, 10, 11, + 32767, 32767, -1248, 32767, 13, 154, 188, 188, + 32767, 32767, 32767, 32767, 32767, 155, 16, 32767, + 32767, 32767, 32767, 32767, 32767, -1853, -1054, 18, + -1052, -1051, -1036, 22, 32767, 157, 32767, 28, + 23, 1077, 673, 25, -2930, 0, 32767, 32767, + 32767, 32767, 32767, 27, 32767, 155, 32767, 154, + 32767, 32767, -62, 28, -42, 30, -1051, 32, + -1050, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 34, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 129, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 672, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 0, 32767, + 32767, 32767, 32767, 32767, -156, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, -155, 32767, 32767, + 32767, 0, 0, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 73, 32767, 32767, 32767, 32767, 74, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 675, + 32767, 32767, 32767, 32767, 32767, 75, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 165, 32767, 32767, 32767, 166, 167, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 170, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 689, 690, 691, 692, 693, 694, 695, + 696, 697, 698, 699, 700, 701, 702, 703, + 704, 705, 706, 707, 708, 709, 710, 711, + 712, 713, 714, 715, 716, 717, 718, 719, + 720, 721, 722, -304, -303, -302, -301, -300, + -299, -298, -297, 930, -295, -294, -293, -292, + -291, -290, -289, -288, -287, -286, -285, -284, + -283, -282, -281, -280, -279, -278, -277, -276, + -275, 753, 754, 755, 646, 757, -712, -1765, + 952, -712, 2244, -712, 2245, 765, 766, 767, + 768, 125, 770, 771, 772, 773, 774, 775, + 603, 777, 778, 779, 780, 781, 782, 783, + 784, 2011, 786, 787, 788, 789, 790, 791, + 792, 793, 794, 795, 796, 797, 798, 799, + 800, 801, 802, 803, 804, 805, 806, 603, + 603, 809, 603, 811, 603, 603, 814, 815, + 816, 817, 435, 819, 820, 821, 3539, 823, + 603, -468, 603, -468, 603, 603, 589, 831, + 603, 603, 603, 835, 836, 837, 838, 839, + 840, 841, 842, 843, 844, 845, 846, 847, + 848, 849, 850, 851, 852, 1239, 854, 855, + 856, 857, 858, 859, 860, 1024, 862, 863, + 864, 865, 866, 867, 868, 869, 870, 871, + 872, 873, 874, 875, 876, 877, 878, 879, + 880, 881, 882, 883, 884, 1131, 1061, -373, + 888, 889, 890, 1065, 1065, 893, 1066, 895, + 896, 897, 898, 899, 900, -148, 902, 603, + 603, -166, 906, -164, 908, -162, -161, -146, + 912, 913, 914, 915, -145, 917, 1971, -745, + 920, -2035, 922, 923, 937, 925, 926, 927, + 928, -1227, 930, 931, 932, 933, 934, 935, + 936, 866, 938, -143, 940, -142, -141, 943, + -140, 32767, 945, 946, 947, 948, 949, 950, + 951, 952, 953, 954, 955, 956, 957, 958, + 959, 960, 961, -65, -64, -63, -62, -61, + -60, -59, -58, 1169, -56, -55, -54, -53, + -52, -51, -50, -49, -48, -47, -46, -45, + -44, -43, -42, -41, -40, -39, -38, -37, + -36, 992, 993, 994, 885, 996, -473, -1526, + 1191, -473, 2483, -473, 2484, 1004, 1005, 1006, + 1007, 364, 1009, 1010, 1011, 1012, 1013, 1014, + 842, 1016, 1017, 1018, 1019, 1020, 1021, 1022, + 1023, 2250, 1025, 1026, 1027, 1028, 1029, 1030, + 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, + 1039, 1040, 1041, 1042, 1043, 1044, 1045, 842, + 842, 1048, 842, 1050, 842, 842, 1053, 1054, + 1055, 1056, 674, 1058, 1059, 1060, 3778, 1062, + 842, -229, 842, -229, 842, 842, 828, 1070, + 842, 842, 842, 1074, 1075, 1076, 1077, 1078, + 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, + 1087, 1088, 1089, 1090, 1091, 1478, 1093, 1094, + 1095, 1096, 1097, 1098, 1099, 1263, 1101, 1102, + 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, + 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, + 1119, 1120, 1121, 1122, 1123, 1370, 1300, -134, + 1127, 1128, 1129, 1304, 1304, 1132, 1305, 1134, + 1135, 1136, 1137, 1138, 1139, 91, 1141, 842, + 842, 73, 1145, 75, 1147, 77, 78, 93, + 1151, 1152, 1153, 1154, 94, 1156, 2210, -506, + 1159, -1796, 1161, 1162, 1176, 1164, 1165, 1166, + 1167, -988, 1169, 1170, 1171, 1172, 1173, 1174, + 1175, 1105, 1177, 96, 1179, 97, 98, 1182, + 99, 1184, 1185, 1186, 1187, 1188, 1189, 1190, + 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, + 1199, 1200, 0, 174, 175, 176, 177, 178, + 179, 180, 181, 1408, 183, 184, 185, 186, + 187, 188, 189, 190, 191, 192, 193, 194, + 195, 196, 197, 198, 199, 200, 201, 202, + 203, 0, 0, 206, 0, 208, 0, 0, + 211, 212, 213, 214, -168, 216, 217, 218, + 2936, 220, 0, -1071, 0, -1071, 0, 0, + -14, 228, 0, 0, 0, 232, 233, 234, + 235, 236, 237, 238, 239, 240, 241, 242, + 243, 244, 245, 246, 247, 248, 249, 636, + 251, 252, 253, 254, 255, 256, 257, 421, + 259, 260, 261, 262, 263, 264, 265, 266, + 267, 268, 269, 270, 271, 272, 273, 274, + 275, 276, 277, 278, 279, 280, 281, 528, + 458, -976, 285, 286, 287, 462, 462, 290, + 463, 292, 293, 294, 295, 296, 297, -751, + 299, 0, 0, -769, 303, -767, 305, -765, + -764, -749, 309, 310, 311, 312, -748, 314, + 1368, -1348, 317, -2638, 319, 320, 334, 322, + 323, 324, 325, -1830, 327, 328, 329, 330, + 331, 332, 333, 263, 335, -746, 337, -745, + -744, 340, -743, 342, 343, 344, 345, 346, + 347, 348, 349, 350, 351, 352, 353, 354, + 355, 356, 357, 358, 0, 0, 0, 1453, + 0, 1126, 495, 495, 495, 495, 495, 233, + 495, 1080, 1080, 1080, 495, 561, 1082, 495, + 563, 1687, 495, 495, 495, 495, 385, 0, + 0, 0, 0, 480, 0, 221, 221, 0, + 489, 579, 0, 0, 0, 498, 626, 0, + 1422, 1040, 1424, 631, 0, 0, 0, 0, + 0, -262, 0, 585, 585, 585, 0, 66, + 587, 0, 68, 1192, 0, 0, 0, 0, + 0, 0, 32767, 32767, 32767, 32767, 669, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 670, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 142, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 115, 116, 117, 118, 119, 120, + 121, 122, 123, 124, 125, 126, 127, 128, + 129, 130, 131, 132, 133, 134, 135, 136, + 137, 138, 139, 140, 141, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1027, 1027, 1027, + 1027, 1027, 1027, 1027, 1027, -199, 1027, 1027, + 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, + 1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027, + 1027, 1027, 1027, 0, 0, 0, 110, 0, + 1470, 2524, -192, 1473, -1482, 1475, -1481, 0, + 0, 0, 0, 644, 0, 0, 0, 0, + 0, 0, 173, 0, 0, 0, 0, 0, + 0, 0, 0, -1226, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 204, 205, 0, 207, 0, 209, 210, + 0, 0, 0, 0, 383, 0, 0, + }; + + const unsigned char *k = (const unsigned char *) key; + size_t keylen = 4; + uint32 a = 0; + uint32 b = 0; + + while (keylen--) + { + unsigned char c = *k++; + + a = a * 257 + c; + b = b * 17 + c; + } + return h[a % 2463] + h[b % 2463]; +} + +static const unicode_norm_info UnicodeNormInfo_NFC_QC = { + UnicodeNormProps_NFC_QC, + NFC_QC_hash_func, + 1231 +}; + static const pg_unicode_normprops UnicodeNormProps_NFKC_QC[] = { {0x00A0, UNICODE_NORM_QC_NO}, {0x00A8, UNICODE_NORM_QC_NO}, diff --git a/src/tools/PerfectHash.pm b/src/tools/PerfectHash.pm index d6841589a3..26efa7ad70 100644 --- a/src/tools/PerfectHash.pm +++ b/src/tools/PerfectHash.pm @@ -124,7 +124,7 @@ sub generate_hash_function $f .= sprintf "\tstatic const %s h[%d] = {\n", $elemtype, $nhash; for (my $i = 0; $i < $nhash; $i++) { - $f .= sprintf "%s%6d,%s", + $f .= sprintf "%s%d,%s", ($i % 8 == 0 ? "\t\t" : " "), $hashtab[$i], ($i % 8 == 7 ? "\n" : ""); -- 2.22.0