From cf29ab1933e0284c8db06f9aaf06561c99cefa0a Mon Sep 17 00:00:00 2001
From: Daniel Gustafsson <daniel@yesql.se>
Date: Mon, 31 Oct 2016 15:55:47 +0100
Subject: [PATCH 2/5] Make all scripts use strict and rearrange logic

use strict is enforcing good hygiene in the code and protects against
introducing new variables (and with them subtle bugs) due to typos in
variable names (among other things). Also rearrange a few loops to
avoid breaking out of loops early to make it easier to read.
---
 src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl      | 13 ++--
 .../utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl        | 43 ++++++------
 src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl      |  3 +-
 src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl      |  3 +-
 src/backend/utils/mb/Unicode/UCS_to_GB18030.pl     | 13 ++--
 src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl       |  3 +-
 .../utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl      | 77 ++++++++++------------
 src/backend/utils/mb/Unicode/UCS_to_UHC.pl         | 13 ++--
 src/backend/utils/mb/Unicode/UCS_to_most.pl        | 10 +--
 src/backend/utils/mb/Unicode/convutils.pm          |  2 +
 10 files changed, 90 insertions(+), 90 deletions(-)

diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
index a290931..d9e112b 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
@@ -13,13 +13,14 @@
 # where the "u" field is the Unicode code point in hex,
 # and the "b" field is the hex byte sequence for GB18030
 
+use strict;
 require "convutils.pm";
 
-$this_script = $0;
+my $this_script = $0;
 
 # Read the input
 
-$in_file = "gb-18030-2000.xml";
+my $in_file = "gb-18030-2000.xml";
 
 open(my $fd, '<', $in_file) || die("cannot open $in_file");
 
@@ -28,11 +29,11 @@ my @mapping;
 while (<$fd>)
 {
 	next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
-	$u = $1;
-	$c = $2;
+	my $u = $1;
+	my $c = $2;
 	$c =~ s/ //g;
-	$ucs  = hex($u);
-	$code = hex($c);
+	my $ucs  = hex($u);
+	my $code = hex($c);
 
 	# The GB-18030 character set, which we use as the source, contains
 	# a lot of extra characters on top of the GB2312 character set that
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
index aff0d35..b170df7 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
@@ -7,13 +7,14 @@
 # Generate UTF-8 <--> EUC_JIS_2004 code conversion tables from
 # "euc-jis-2004-std.txt" (http://x0213.org)
 
+use strict;
 require "convutils.pm";
 
-$this_script = $0;
+my $this_script = $0;
 
 # first generate UTF-8 --> EUC_JIS_2004 table
 
-$in_file = "euc-jis-2004-std.txt";
+my $in_file = "euc-jis-2004-std.txt";
 
 open(my $fd, '<', $in_file) || die("cannot open $in_file");
 
@@ -23,38 +24,36 @@ while (my $line = <$fd>)
 {
 	if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
 	{
-		$c              = $1;
-		$u1             = $2;
-		$u2             = $3;
-		$rest           = "U+" . $u1 . "+" . $u2 . $4;
-		$code           = hex($c);
-		$ucs1           = hex($u1);
-		$ucs2           = hex($u2);
+		my $c              = $1;
+		my $u1             = $2;
+		my $u2             = $3;
+		my $rest           = "U+" . $u1 . "+" . $u2 . $4;
+		my $code           = hex($c);
+		my $ucs1           = hex($u1);
+		my $ucs2           = hex($u2);
 
 		push @all, { direction => 'both',
 					 ucs => $ucs1,
 					 ucs_second => $ucs2,
 					 code => $code,
 					 comment => $rest };
-		next;
 	}
 	elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
 	{
-		$c    = $1;
-		$u    = $2;
-		$rest = "U+" . $u . $3;
-	}
-	else
-	{
-		next;
-	}
+		my $c    = $1;
+		my $u    = $2;
+		my $rest = "U+" . $u . $3;
 
-	$ucs  = hex($u);
-	$code = hex($c);
+		my $ucs  = hex($u);
+		my $code = hex($c);
 
-	next if ($code < 0x80 && $ucs < 0x80);
+		next if ($code < 0x80 && $ucs < 0x80);
 
-	push @all, { direction => 'both', ucs => $ucs, code => $code, comment => $rest };
+		push @all, { direction => 'both',
+					 ucs => $ucs,
+					 code => $code,
+					 comment => $rest };
+	}
 }
 close($fd);
 
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
index a00d25c..aa8f2f7 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
@@ -16,9 +16,10 @@
 #		 UCS-2 code in hex
 #		 # and Unicode name (not used in this script)
 
+use strict;
 require "convutils.pm";
 
-$this_script = $0;
+my $this_script = $0;
 
 # Load the source file.
 
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
index 995657e..e5a9805 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
@@ -17,9 +17,10 @@
 #		 UCS-2 code in hex
 #		 # and Unicode name (not used in this script)
 
+use strict;
 require "convutils.pm";
 
-$this_script = $0;
+my $this_script = $0;
 
 my $mapping = &read_source("CNS11643.TXT");
 
diff --git a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
index c1ade68..91fb9f6 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
@@ -13,13 +13,14 @@
 # where the "u" field is the Unicode code point in hex,
 # and the "b" field is the hex byte sequence for GB18030
 
+use strict;
 require "convutils.pm";
 
-$this_script = $0;
+my $this_script = $0;
 
 # Read the input
 
-$in_file = "gb-18030-2000.xml";
+my $in_file = "gb-18030-2000.xml";
 
 open(my $fd, '<', $in_file) || die("cannot open $in_file");
 
@@ -28,11 +29,11 @@ my @mapping;
 while (<$fd>)
 {
 	next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
-	$u = $1;
-	$c = $2;
+	my $u = $1;
+	my $c = $2;
 	$c =~ s/ //g;
-	$ucs  = hex($u);
-	$code = hex($c);
+	my $ucs  = hex($u);
+	my $code = hex($c);
 	if ($code >= 0x80 && $ucs >= 0x0080)
 	{
 		push @mapping, {
diff --git a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
index 50735eb..6c8a8c5 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
@@ -15,9 +15,10 @@
 #		 UCS-2 code in hex
 #		 # and Unicode name (not used in this script)
 
+use strict;
 require "convutils.pm";
 
-$this_script = $0;
+my $this_script = $0;
 
 # Load the source file.
 
diff --git a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
index 86ed705..cfe3cce 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
@@ -7,13 +7,14 @@
 # Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from
 # "sjis-0213-2004-std.txt" (http://x0213.org)
 
+use strict;
 require "convutils.pm";
 
 # first generate UTF-8 --> SHIFT_JIS_2004 table
 
-$this_script = $0;
+my $this_script = $0;
 
-$in_file = "sjis-0213-2004-std.txt";
+my $in_file = "sjis-0213-2004-std.txt";
 
 open(my $fd, '<', $in_file) || die("cannot open $in_file");
 
@@ -23,13 +24,13 @@ while (my $line = <$fd>)
 {
 	if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
 	{
-		$c              = $1;
-		$u1             = $2;
-		$u2             = $3;
-		$rest           = "U+" . $u1 . "+" . $u2 . $4;
-		$code           = hex($c);
-		$ucs1           = hex($u1);
-		$ucs2           = hex($u2);
+		my $c              = $1;
+		my $u1             = $2;
+		my $u2             = $3;
+		my $rest           = "U+" . $u1 . "+" . $u2 . $4;
+		my $code           = hex($c);
+		my $ucs1           = hex($u1);
+		my $ucs2           = hex($u2);
 
 		push @mapping, {
 			code => $code,
@@ -38,45 +39,37 @@ while (my $line = <$fd>)
 			comment => $rest,
 			direction => 'both'
 		};
-		next;
 	}
 	elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
 	{
-		$c    = $1;
-		$u    = $2;
-		$rest = "U+" . $u . $3;
-	}
-	else
-	{
-		next;
-	}
+		my $direction = 'both';
+		my $c    	  = $1;
+		my $u   	  = $2;
+		my $rest 	  = "U+" . $u . $3;
 
-	$ucs  = hex($u);
-	$code = hex($c);
+		my $ucs  = hex($u);
+		my $code = hex($c);
 
-	if ($code < 0x80 && $ucs < 0x80)
-	{
-		next;
-	}
-	elsif ($code < 0x80)
-	{
-		$direction = 'from_unicode';
-	}
-	elsif ($ucs < 0x80)
-	{
-		$direction = 'to_unicode';
-	}
-	else
-	{
-		$direction = 'both';
-	}
+		if ($code < 0x80 && $ucs < 0x80)
+		{
+			next;
+		}
+		elsif ($code < 0x80)
+		{
+			$direction = 'from_unicode';
+		}
+		elsif ($ucs < 0x80)
+		{
+			$direction = 'to_unicode';
+		}
 
-	push @mapping, {
-		code => $code,
-		ucs => $ucs,
-		comment => $rest,
-		direction => $direction
-	};
+		push @mapping, {
+			code => $code,
+			ucs => $ucs,
+			comment => $rest,
+			direction => $direction
+		};
+	}
 }
 close($fd);
 
diff --git a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl
index e49e5c9..17f58d3 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl
@@ -13,13 +13,14 @@
 # where the "u" field is the Unicode code point in hex,
 # and the "b" field is the hex byte sequence for UHC
 
+use strict;
 require "convutils.pm";
 
-$this_script = $0;
+my $this_script = $0;
 
 # Read the input
 
-$in_file = "windows-949-2000.xml";
+my $in_file = "windows-949-2000.xml";
 
 open(my $in, '<', $in_file) || die("cannot open $in_file");
 
@@ -28,11 +29,11 @@ my @mapping;
 while (<$in>)
 {
 	next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
-	$u = $1;
-	$c = $2;
+	my $u = $1;
+	my $c = $2;
 	$c =~ s/ //g;
-	$ucs  = hex($u);
-	$code = hex($c);
+	my $ucs  = hex($u);
+	my $code = hex($c);
 
 	next if ($code == 0x0080 || $code == 0x00FF);
 
diff --git a/src/backend/utils/mb/Unicode/UCS_to_most.pl b/src/backend/utils/mb/Unicode/UCS_to_most.pl
index 631214e..23bcb55 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_most.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_most.pl
@@ -15,11 +15,12 @@
 #		 UCS-2 code in hex
 #		 # and Unicode name (not used in this script)
 
+use strict;
 require "convutils.pm";
 
-$this_script = $0;
+my $this_script = $0;
 
-%filename = (
+my %filename = (
 	'WIN866'     => 'CP866.TXT',
 	'WIN874'     => 'CP874.TXT',
 	'WIN1250'    => 'CP1250.TXT',
@@ -48,9 +49,8 @@ $this_script = $0;
 	'KOI8U'      => 'KOI8-U.TXT',
 	'GBK'        => 'CP936.TXT');
 
-@charsets = keys(%filename);
-@charsets = @ARGV if scalar(@ARGV);
-foreach $charset (@charsets)
+my @charsets = (scalar(@ARGV) > 0) ? @ARGV : keys(%filename);
+foreach my $charset (@charsets)
 {
 	my $mapping = &read_source($filename{$charset});
 
diff --git a/src/backend/utils/mb/Unicode/convutils.pm b/src/backend/utils/mb/Unicode/convutils.pm
index cb0c596..7561aca 100644
--- a/src/backend/utils/mb/Unicode/convutils.pm
+++ b/src/backend/utils/mb/Unicode/convutils.pm
@@ -3,6 +3,8 @@
 #
 # src/backend/utils/mb/Unicode/convutils.pm
 
+use strict;
+
 #######################################################################
 # convert UCS-4 to UTF-8
 #
-- 
2.6.4 (Apple Git-63)