From cf29ab1933e0284c8db06f9aaf06561c99cefa0a Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Mon, 31 Oct 2016 15:55:47 +0100 Subject: [PATCH 2/5] Make all scripts use strict and rearrange logic use strict is enforcing good hygiene in the code and protects against introducing new variables (and with them subtle bugs) due to typos in variable names (among other things). Also rearrange a few loops to avoid breaking out of loops early to make it easier to read. --- src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl | 13 ++-- .../utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl | 43 ++++++------ src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl | 3 +- src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl | 3 +- src/backend/utils/mb/Unicode/UCS_to_GB18030.pl | 13 ++-- src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl | 3 +- .../utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl | 77 ++++++++++------------ src/backend/utils/mb/Unicode/UCS_to_UHC.pl | 13 ++-- src/backend/utils/mb/Unicode/UCS_to_most.pl | 10 +-- src/backend/utils/mb/Unicode/convutils.pm | 2 + 10 files changed, 90 insertions(+), 90 deletions(-) diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl index a290931..d9e112b 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl @@ -13,13 +13,14 @@ # where the "u" field is the Unicode code point in hex, # and the "b" field is the hex byte sequence for GB18030 +use strict; require "convutils.pm"; -$this_script = $0; +my $this_script = $0; # Read the input -$in_file = "gb-18030-2000.xml"; +my $in_file = "gb-18030-2000.xml"; open(my $fd, '<', $in_file) || die("cannot open $in_file"); @@ -28,11 +29,11 @@ my @mapping; while (<$fd>) { next if (!m/ EUC_JIS_2004 code conversion tables from # "euc-jis-2004-std.txt" (http://x0213.org) +use strict; require "convutils.pm"; -$this_script = $0; +my $this_script = $0; # first generate UTF-8 --> EUC_JIS_2004 table -$in_file = "euc-jis-2004-std.txt"; +my $in_file = "euc-jis-2004-std.txt"; open(my $fd, '<', $in_file) || die("cannot open $in_file"); @@ -23,38 +24,36 @@ while (my $line = <$fd>) { if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) { - $c = $1; - $u1 = $2; - $u2 = $3; - $rest = "U+" . $u1 . "+" . $u2 . $4; - $code = hex($c); - $ucs1 = hex($u1); - $ucs2 = hex($u2); + my $c = $1; + my $u1 = $2; + my $u2 = $3; + my $rest = "U+" . $u1 . "+" . $u2 . $4; + my $code = hex($c); + my $ucs1 = hex($u1); + my $ucs2 = hex($u2); push @all, { direction => 'both', ucs => $ucs1, ucs_second => $ucs2, code => $code, comment => $rest }; - next; } elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) { - $c = $1; - $u = $2; - $rest = "U+" . $u . $3; - } - else - { - next; - } + my $c = $1; + my $u = $2; + my $rest = "U+" . $u . $3; - $ucs = hex($u); - $code = hex($c); + my $ucs = hex($u); + my $code = hex($c); - next if ($code < 0x80 && $ucs < 0x80); + next if ($code < 0x80 && $ucs < 0x80); - push @all, { direction => 'both', ucs => $ucs, code => $code, comment => $rest }; + push @all, { direction => 'both', + ucs => $ucs, + code => $code, + comment => $rest }; + } } close($fd); diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl index a00d25c..aa8f2f7 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl @@ -16,9 +16,10 @@ # UCS-2 code in hex # # and Unicode name (not used in this script) +use strict; require "convutils.pm"; -$this_script = $0; +my $this_script = $0; # Load the source file. diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl index 995657e..e5a9805 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl @@ -17,9 +17,10 @@ # UCS-2 code in hex # # and Unicode name (not used in this script) +use strict; require "convutils.pm"; -$this_script = $0; +my $this_script = $0; my $mapping = &read_source("CNS11643.TXT"); diff --git a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl index c1ade68..91fb9f6 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl @@ -13,13 +13,14 @@ # where the "u" field is the Unicode code point in hex, # and the "b" field is the hex byte sequence for GB18030 +use strict; require "convutils.pm"; -$this_script = $0; +my $this_script = $0; # Read the input -$in_file = "gb-18030-2000.xml"; +my $in_file = "gb-18030-2000.xml"; open(my $fd, '<', $in_file) || die("cannot open $in_file"); @@ -28,11 +29,11 @@ my @mapping; while (<$fd>) { next if (!m/= 0x80 && $ucs >= 0x0080) { push @mapping, { diff --git a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl index 50735eb..6c8a8c5 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl @@ -15,9 +15,10 @@ # UCS-2 code in hex # # and Unicode name (not used in this script) +use strict; require "convutils.pm"; -$this_script = $0; +my $this_script = $0; # Load the source file. diff --git a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl index 86ed705..cfe3cce 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl @@ -7,13 +7,14 @@ # Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from # "sjis-0213-2004-std.txt" (http://x0213.org) +use strict; require "convutils.pm"; # first generate UTF-8 --> SHIFT_JIS_2004 table -$this_script = $0; +my $this_script = $0; -$in_file = "sjis-0213-2004-std.txt"; +my $in_file = "sjis-0213-2004-std.txt"; open(my $fd, '<', $in_file) || die("cannot open $in_file"); @@ -23,13 +24,13 @@ while (my $line = <$fd>) { if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) { - $c = $1; - $u1 = $2; - $u2 = $3; - $rest = "U+" . $u1 . "+" . $u2 . $4; - $code = hex($c); - $ucs1 = hex($u1); - $ucs2 = hex($u2); + my $c = $1; + my $u1 = $2; + my $u2 = $3; + my $rest = "U+" . $u1 . "+" . $u2 . $4; + my $code = hex($c); + my $ucs1 = hex($u1); + my $ucs2 = hex($u2); push @mapping, { code => $code, @@ -38,45 +39,37 @@ while (my $line = <$fd>) comment => $rest, direction => 'both' }; - next; } elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) { - $c = $1; - $u = $2; - $rest = "U+" . $u . $3; - } - else - { - next; - } + my $direction = 'both'; + my $c = $1; + my $u = $2; + my $rest = "U+" . $u . $3; - $ucs = hex($u); - $code = hex($c); + my $ucs = hex($u); + my $code = hex($c); - if ($code < 0x80 && $ucs < 0x80) - { - next; - } - elsif ($code < 0x80) - { - $direction = 'from_unicode'; - } - elsif ($ucs < 0x80) - { - $direction = 'to_unicode'; - } - else - { - $direction = 'both'; - } + if ($code < 0x80 && $ucs < 0x80) + { + next; + } + elsif ($code < 0x80) + { + $direction = 'from_unicode'; + } + elsif ($ucs < 0x80) + { + $direction = 'to_unicode'; + } - push @mapping, { - code => $code, - ucs => $ucs, - comment => $rest, - direction => $direction - }; + push @mapping, { + code => $code, + ucs => $ucs, + comment => $rest, + direction => $direction + }; + } } close($fd); diff --git a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl index e49e5c9..17f58d3 100755 --- a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl +++ b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl @@ -13,13 +13,14 @@ # where the "u" field is the Unicode code point in hex, # and the "b" field is the hex byte sequence for UHC +use strict; require "convutils.pm"; -$this_script = $0; +my $this_script = $0; # Read the input -$in_file = "windows-949-2000.xml"; +my $in_file = "windows-949-2000.xml"; open(my $in, '<', $in_file) || die("cannot open $in_file"); @@ -28,11 +29,11 @@ my @mapping; while (<$in>) { next if (!m/ 'CP866.TXT', 'WIN874' => 'CP874.TXT', 'WIN1250' => 'CP1250.TXT', @@ -48,9 +49,8 @@ $this_script = $0; 'KOI8U' => 'KOI8-U.TXT', 'GBK' => 'CP936.TXT'); -@charsets = keys(%filename); -@charsets = @ARGV if scalar(@ARGV); -foreach $charset (@charsets) +my @charsets = (scalar(@ARGV) > 0) ? @ARGV : keys(%filename); +foreach my $charset (@charsets) { my $mapping = &read_source($filename{$charset}); diff --git a/src/backend/utils/mb/Unicode/convutils.pm b/src/backend/utils/mb/Unicode/convutils.pm index cb0c596..7561aca 100644 --- a/src/backend/utils/mb/Unicode/convutils.pm +++ b/src/backend/utils/mb/Unicode/convutils.pm @@ -3,6 +3,8 @@ # # src/backend/utils/mb/Unicode/convutils.pm +use strict; + ####################################################################### # convert UCS-4 to UTF-8 # -- 2.6.4 (Apple Git-63)