kzhr's diary

ad ponendum

IDSをふくむテクストを一字づつ切り出すperlスクリプト

use strict;
use warnings;
use utf8;

sub ids_split {
	my $text = shift;
	my @chars = split //, $text;
	my @split;
	my $buffer_text = '';
	my $buffer_num = 0;
	for my $char (@chars) {
		if ($char =~ /[⿰⿱⿴⿵⿶⿷⿸⿹⿺⿻]/) {
			$buffer_num = $buffer_num ? $buffer_num + 2 : $buffer_num + 3;
		}
		elsif ($char =~ /[⿲⿳]/) {
			$buffer_num += $buffer_num ? $buffer_num + 3 : $buffer_num + 4;
		}

		if ($buffer_num) {
			$buffer_text .= $char;
			$buffer_num -= 1;
			next if $buffer_num;
			push @split, $buffer_text;
			$buffer_text = '';
		}
		else {
			push @split, $char;
		}
	}
	return @split;
}

1;