#!/usr/local/bin/perl -w # # cfoq: check fascistly overquoted by tchrist@mox.perl.com # (wants perl 5.0 or better; developed under 5.002) # # INPUT: a news article # OUTPUT: if -v, then shows how much it found and where # # OPTIONS: -v for verbose flag # -t NN to change tolerance percentage from 50% # -m for minimum lines that get subject to percentage counting; # otherwise, it just checks for ANY new lines. This way # short little 6-line messages with 4 lines of quoting don't # get hassled # # EXIT: 2 (bad failure) if no new lines # 1 (failure) if tolerance exceeded # 0 (success) otherwise # 5.0 might work, but I developed it under 5.002 require 5.002; use strict; use vars qw{ $MINLINES $opt_m $VERBOSE $opt_v $TOLERANCE $opt_t }; use Getopt::Std; getopts("vt:m:") || die "usage: $0 [-v] [-t tolerance] [-m minlines] [input_file]\n"; my ( $total, # total number of lines, minus sig and attribution $quoted_lines, # how many lines were quoted $percent, # what percentage this in $pcount, # how many in this paragraph were counted $match_part, # holding space for current match $gotsig, # is this the sig paragraph? ); $total = $quoted_lines = $pcount = $percent = 0; $MINLINES = $opt_m || 20; $VERBOSE = $opt_v; $TOLERANCE = $opt_t || 50; $/ = ''; # set record reading to paragraph mode ; # consume and discard header of message while () { # strip sig line, remember we found it $gotsig = s/^-- \n.*//ms; # strip attribution, possibly multiline if ($. == 2) { s/\A.*?(<.*?>|\@).*?:\n//s } # toss trailing blank lines into one single line s/\n+\Z/\n/; # now reduce miswrapped lines from idiotic broken PC newsreaders # into what they should have been s/(>.*)\n\s*([a-zA-Z])/$1 $2/g; # count lines in this paragraph $total++ while /^./mg; # is it a single line, quoted in the customary fashion? if ( /^(>+).*\n\Z/ ) { $quoted_lines++; print " 1 line quoted with $1\n" if $VERBOSE; next; } # otherwise, it's a multiline block, which may be quoted # with any leading repeated string that's neither alphanumeric # nor string while (/^(([^\w\s]+).*\n)(\2.*\n)+/mg) { # YANETUT $quoted_lines += $pcount = ($match_part = $&) =~ tr/\n//; printf "%2d lines quoted with $2\n", $pcount if $VERBOSE; } last if $gotsig; } $percent = int($quoted_lines / $total * 100); print "$quoted_lines lines quoted out of $total: $percent%\n" if $VERBOSE; if ($total == $quoted_lines) { print "All $total lines were quoted lines!\n" if $VERBOSE; exit(2); } if ($percent > $TOLERANCE) { if ($total < $MINLINES) { print "but since $total is less than $MINLINES lines, that's ok\n" if $VERBOSE; exit 0; } else { exit 1; } } else { exit 0; }