#!/bin/perl
use strict;
use Time::HiRes qw(usleep);
use IPC::SysV qw(IPC_CREAT IPC_PRIVATE SEM_UNDO S_IWUSR);
use IPC::Semaphore;
use POSIX ":sys_wait_h";
use Config;
my $CYGWIN = 0;
if ($Config{"osname"} eq "cygwin"){
$CYGWIN = 1;
require Win32::Semaphore;
}
my $CNTONLY = 0;
my $textFileName;
my $wordsFileName;
my $rootPid = $$;
my %childrenPid;
my $childrenCnt = 0;
my $sem;
if ($CYGWIN){
$sem = Win32::Semaphore->new(1, 1, "searchwords.pl");
}else{
$sem = IPC::Semaphore->new(IPC_PRIVATE, 1, IPC_CREAT | S_IWUSR);
}
if (!$sem) {
print "failed to create ipc sem\n";
exit 1;
}
if (!$CYGWIN){
$sem->setval(0, 1);
}
sub ipcEnter{
if ($CYGWIN){
$sem->wait();
}else{
$sem->op(0, -1, SEM_UNDO);
}
}
sub ipcLeave{
if ($CYGWIN){
$sem->release();
}else{
$sem->op(0, 1, 0);
}
}
sub cleanup{
if ($CYGWIN){
}else{
$sem->remove();
}
logd("cleanuped\n");
}
$SIG{'INT'} = sub {
if ($rootPid != $$){
exit 1;
}
print "Caught SIG INT...\n";
ipcEnter();
my $key;
my $value;
while (($key, $value) = each(%childrenPid)){
print "kill $key\n";
kill $key;
}
ipcLeave();
cleanup();
print "terminated\n";
exit 1;
};
if (@ARGV == 0){
print <<'EOT';
Usage:
searchwords.pl [options] [target_text] [words_list_file] [options]
e.g. $ perl --count searchwords.pl target.txt words.txt
words_list_file:
Each word should be separated by \n.
options:
--count
Output count of words only.
EOT
exit 1;
}
for(my $i = 0 ; $i < @ARGV ; ++$i){
if ($ARGV[$i] =~ /^-/){
if ($ARGV[$i] == "--count"){
$CNTONLY = 1;
}
}elsif (length($textFileName) == 0){
$textFileName = $ARGV[$i];
}else{
$wordsFileName = $ARGV[$i];
}
}
if (length($textFileName) == 0 || length($wordsFileName) == 0){
print "files should be specified!\n";
exit(1);
}
print "text : $textFileName\n";
if(!open(WORDS,"<","$wordsFileName")){
print "file open error!\n";
exit(1);
}
my $paramWords;
my $CHILDREN_LIMIT = 8;
my $WORDS_PER_CHILD = 1;
my $i = 0;
my $wordInfo;
while(<WORDS>){
my $word = $_;
$word =~ s/\n//;
if (length($word) == 0){
next;
}
$word =~ s/ /\\ /g;
$word =~ s/\#/\\\#/g;
$word =~ s/\(/\\\(/g;
$word =~ s/\)/\\\)/g;
$word =~ s/\t/\\t/g;
$paramWords .= "-e $word ";
$i++;
if ($i >= $WORDS_PER_CHILD){
forkChild($wordInfo);
$i = 0;
$paramWords = "";
}
if ($childrenCnt >= $CHILDREN_LIMIT){
my $child = waitpid(-1, 0);
my $key;
my $value;
if ($child == -1){
logd("there are not any children\n");
ipcEnter();
while (($key, $value) = each(%childrenPid)){
delete($childrenPid{$key});
}
$childrenCnt = 0;
ipcLeave();
}else{
while (($key, $value) = each(%childrenPid)){
if ($child == $key){
logd("child $key terminated\n");
ipcEnter();
delete($childrenPid{$key});
$childrenCnt--;
ipcLeave();
}
}
}
}
}
if ($i != 0){
forkChild($wordInfo);
}
sub forkChild{
my $info = $_[0];
ipcEnter();
$childrenCnt++;
if (my $pid = fork()) {
$childrenPid{$pid} = "dummy";
ipcLeave();
logd("forked $pid\n");
} else {
logd("cat $textFileName | grep -c $paramWords\n");
my $lines;
my $cnt = 0;
if ($CNTONLY){
$cnt = `cat $textFileName | grep -c $paramWords`;
$cnt =~ s/\n//;
}else{
open(CMD, "cat $textFileName | grep $paramWords |")
|| die "failed to run grep\n";
while(<CMD>){
++$cnt;
ipcEnter();
print $_;
ipcLeave();
}
close CMD;
}
$paramWords =~ s/-e //g;
ipcEnter();
print("words $paramWords count is $cnt");
if (length($info) != 0){
print(" : $info\n");
}else{
print("\n");
}
ipcLeave();
exit(0);
}
}
my $key;
my $value;
while (($key, $value) = each(%childrenPid)){
logd("wait to terminate child $key\n");
waitpid($key, 0);
}
cleanup();
sub logd{
}