#!perl use strict; use warnings; use utf8; use open qw/:std :utf8/; my $substr = 3; # まとめ文字数 my $file = shift @ARGV; open my $fh, "<", $file or die; my %match; my $fno = ''; my $lno = 1; my $llno = 0; my $skip = 0; while (my $line = <$fh>) { chomp $line; $llno++; if (not $skip) { $skip = 1 if $line eq '(扉)'; $fno = '扉' if $skip; next; } if ($line =~ /^((.+))$/) { $fno = $1; $lno = 1; next; } next if $line !~ /^\d/; while ($line =~ /\b\w*zzu\w*\b/ig) { my $m = my $m_mod = $&; $m_mod = lc $m_mod; $m_mod =~ s/zzu/zu/ig; $m_mod =~ s/gi/ji/ig; my $m3 = substr $m_mod, 0, $substr; push @{$match{$m3}{zzu}}, [$m, $fno, $lno, $llno]; } while ($line =~ /\b\w*(?<!z)zu\w*\b/ig) { my $m = my $m_mod = $&; $m_mod = lc $m_mod; $m_mod =~ s/zzu/zu/ig; $m_mod =~ s/gi/ji/ig; my $m3 = substr $m_mod, 0, $substr; push @{$match{$m3}{zu}}, [$m, $fno, $lno, $llno]; } while ($line =~ /\b\w*ji\w*\b/ig) { my $m = my $m_mod = $&; $m_mod = lc $m_mod; $m_mod =~ s/zzu/zu/ig; $m_mod =~ s/gi/ji/ig; my $m3 = substr $m_mod, 0, $substr; push @{$match{$m3}{ji}}, [$m, $fno, $lno, $llno]; } while ($line =~ /\b\w*gi\w*\b/ig) { my $m = my $m_mod = $&; $m_mod = lc $m_mod; $m_mod =~ s/zzu/zu/ig; $m_mod =~ s/gi/ji/ig; my $m3 = substr $m_mod, 0, $substr; push @{$match{$m3}{gi}}, [$m, $fno, $lno, $llno]; } $lno++; } my @keys = sort keys %match; for my $key (@keys) { if (exists $match{$key}{ji} && exists $match{$key}{gi} or exists $match{$key}{zu} && exists $match{$key}{zzu}) { my $m = $match{$key}; my @yotsu_keys = sort %{$m}; for my $yotsu_key (@yotsu_keys) { for my $y (@{$m->{$yotsu_key}}) { printf "%s\t%s\t%s\t%s\t%s\t%s\n", $key, $yotsu_key, @$y; } } } }
ばあいによって、テキストデータのBOMを落としたり改行コードを直したりする必要はある。対比ではなく全例を見たいときは、if (exists…)のif文をコメントアウトすればよい。