#!perl
use strict;
use warnings;
use utf8;
use open qw/:std :utf8/;
my $substr = 3;
my $file = shift @ARGV;
open my $fh, "<", $file or die;
my %match;
my $fno = '';
my $lno = 1;
my $llno = 0;
my $skip = 0;
while (my $line = <$fh>) {
chomp $line;
$llno++;
if (not $skip) {
$skip = 1 if $line eq '(扉)';
$fno = '扉' if $skip;
next;
}
if ($line =~ /^((.+))$/) {
$fno = $1;
$lno = 1;
next;
}
next if $line !~ /^\d/;
while ($line =~ /\b\w*zzu\w*\b/ig) {
my $m = my $m_mod = $&;
$m_mod = lc $m_mod;
$m_mod =~ s/zzu/zu/ig;
$m_mod =~ s/gi/ji/ig;
my $m3 = substr $m_mod, 0, $substr;
push @{$match{$m3}{zzu}}, [$m, $fno, $lno, $llno];
}
while ($line =~ /\b\w*(?<!z)zu\w*\b/ig) {
my $m = my $m_mod = $&;
$m_mod = lc $m_mod;
$m_mod =~ s/zzu/zu/ig;
$m_mod =~ s/gi/ji/ig;
my $m3 = substr $m_mod, 0, $substr;
push @{$match{$m3}{zu}}, [$m, $fno, $lno, $llno];
}
while ($line =~ /\b\w*ji\w*\b/ig) {
my $m = my $m_mod = $&;
$m_mod = lc $m_mod;
$m_mod =~ s/zzu/zu/ig;
$m_mod =~ s/gi/ji/ig;
my $m3 = substr $m_mod, 0, $substr;
push @{$match{$m3}{ji}}, [$m, $fno, $lno, $llno];
}
while ($line =~ /\b\w*gi\w*\b/ig) {
my $m = my $m_mod = $&;
$m_mod = lc $m_mod;
$m_mod =~ s/zzu/zu/ig;
$m_mod =~ s/gi/ji/ig;
my $m3 = substr $m_mod, 0, $substr;
push @{$match{$m3}{gi}}, [$m, $fno, $lno, $llno];
}
$lno++;
}
my @keys = sort keys %match;
for my $key (@keys) {
if (exists $match{$key}{ji} && exists $match{$key}{gi} or exists $match{$key}{zu} && exists $match{$key}{zzu}) {
my $m = $match{$key};
my @yotsu_keys = sort %{$m};
for my $yotsu_key (@yotsu_keys) {
for my $y (@{$m->{$yotsu_key}}) {
printf "%s\t%s\t%s\t%s\t%s\t%s\n", $key, $yotsu_key, @$y;
}
}
}
}
ばあいによって、テキストデータのBOMを落としたり改行コードを直したりする必要はある。対比ではなく全例を見たいときは、if (exists…)のif文をコメントアウトすればよい。