#!/usr/bin/perl # install in cgi-bin/picknewsgroups # tchrist@mox.perl.com # v1.0; Fri Apr 21 10:34:19 MDT 1995 require 5.0; ############################################## # BEGIN CONFIG SECTION ############################################## $EGREP = 'egrep'; # prefer to use gnu egrep for speed $SORT = 'sort -t. -u +0 -1 '; # maybe +1 -2 if you live in a sad world # run from cron now and then : nntplist newsgroups > $NGFILE #$NGFILE = '/usr/local/lib/news/newsgroups'; $NGFILE = '/tmp/newsgroups'; # I'd rather you chose yourself. :-( ### $HOST = 'mox.perl.com'; $HOST = 'sirius'; ### ($HOST) = gethostbyname(chop($host=`hostname`), $host); ### chop($HOST=`hostname`); ############################################## # END CONFIG SECTION ############################################## if ($HOST) { $HOST = "//$HOST"; } %Groups = (); die "No $NGFILE: $!" unless -f $NGFILE && -r _; if ($ENV{HTTP_USER_AGENT} =~ m#Mozilla/1.1#) { $file_icon = ""; $dir_icon = ""; } if (@ARGV) { $Target = shift; } else { get_request(); if (!($Target = $rqpairs{'newsgroup'})) { ($Target) = each %rqpairs; # top level } } die "Cannot fork: $!" unless defined ($pid = open(INPUT, "-|")); unless ($pid) { if ($Target) { exec $EGREP, "^\Q$Target", $NGFILE; } else { exec "$SORT $NGFILE 2>/dev/null"; } die "exec failed: $!"; } ############################### # Here's the kinda data structure we're going to be building ... ############################### # # %Groups = ( # alt => { # DESC => "useless drivel", # NEXT => { # activism => { # DESC => "Activities for activists", # NEXT => { # d => { # DESC => "A place to discuss issues in alt.activism", # }, # "death-penalty" => { # DESC => "For people opposed to capital punishment", # }, # }, # }, # adoption => { # DESC => "For those involved with or contemplating adoption", # } # }, # }, # comp => { # DESC => "computer stuff", # NEXT => { # lang => { # DESC => "programming languages", # NEXT => { # C => "it's better than a boot to the head", # perl => "welcome to Larry's World", # python => "here be snakes, but not poisonous ones", # tcl => "but for tk, none would bother", # scheme => "job security tips in the ivory tower", # } # } # } # } # ); while () { chomp; # now for the darned tabs in the newsgroups file 1 while s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e; ($ng, $desc) = unpack("A24 A*", $_); if (length($ng) == 24) { $desc =~ s/^(\S+)\s*//; $ng .= $1; } @parts = split /\./, $ng; $last = pop @parts; $gp = \%Groups; for $hier (@parts) { if (!ref $gp->{$hier}{NEXT}) { $gp->{$hier}{NEXT} = {}; } $gp = $gp->{$hier}{NEXT}; } $gp->{$last}{DESC} = $desc; } close(INPUT) || warn "Trouble processing input"; if (!$Target) { read_top_level_descs(); } #hierprint(\%Groups); $gp = \%Groups; # this copies the DESC down a level into a blank key for groups # like comp.lang.c that have their own raison d'etre # apart from holding comp.lang.c.moderated, etc. if ($Target) { $what = "Newsgroups under $Target"; for $hier (split(/\./, $Target)) { if ( $gp->{$hier}{DESC} ) { $gp->{$hier}{NEXT}{""}{DESC} = $gp->{$hier}{DESC}; } $gp = $gp->{$hier}{NEXT}; } promote($gp); } else { $what = "Top Level News Hierarchies"; } html_header($what); if ($Target) { print "

$what

\n"; print "
\n"; } if (ref $gp) { $Target =~ s/\.$//; topprint($gp, $Target); } else { print "Couldn't find anything interesting to print.\n" } print "
\n"; print "\n"; html_trailer(); exit; ###################################### sub topprint { my ($gp, $hier) = @_; if (!$Target) { print "

USENET (Big 7) Hierarchies

\n"; for $group (sort keys %Big7) { $fullname = $group; $desc = $Groups{$group}{DESC}; print <
$dir_icon $fullname.*
$desc xxxYYYxxx delete $Groups{$group}; } print "

Non-USENET Hierarchies

\n"; } for $name (sort keys %$gp) { $desc = $gp->{$name}{DESC}; $kids = $gp->{$name}{NEXT}; if ($desc eq '?') { $desc = ""; } ($fullname = $hier . ($hier && '.' ) . $name) =~ s/\.$//; # the pushed down DESCS are null ^^^^^^^^ if (0 and !$Target) { print <$fullname xxxYYYxxx next; } if ($kids) { print <
$dir_icon $fullname.*
$desc xxxYYYxxx next; } print <
$file_icon $fullname
$desc xxxYYYxxx } } # this isn't used, but if you read in the whole tree, # this will dump it all sub hierprint { my ($gp,$hier) = @_; for $name (sort keys %$gp) { $desc = $gp->{$name}{DESC}; $kids = $gp->{$name}{NEXT}; $fullname = $hier . ($hier && '.' ) . $name; if ($kids) { $width = 2 * $indent - 40; iprint (sprintf("%${width}s %s\n", $fullname, $desc && "<<<$desc>>>")); indent(); hierprint($kids, "$fullname"); undent(); } else { $width = 2 * $indent - 40; iprint (sprintf("%${width}s %s\n", $name, $desc)); } } } sub indent { $indent++ } sub undent { --$indent } sub iprint { print " " x $indent; print @_; } # promote alt.barney.die.die.die all the way up # (actually, there's a bug and i only get the first couple levels) sub promote { my $gp = shift; for $name (keys %$gp) { if ($gp->{$name}{NEXT}) { $children = keys %{$gp->{$name}{NEXT}}; if ($children == 1 && !$gp->{$name}{DESC}) { ($n, $p) = each %{$gp->{$name}{NEXT}}; delete $gp->{$name}; $name = "$name.$n"; %{$gp->{$name}} = %$p; } else { promote($gp->{$name}{NEXT}); } } } } sub read_top_level_descs { for (qw(comp sci rec misc soc talk news)) { $Big7{$_} ++ } local($/) = ''; while () { chomp; s/^(\S+)\s*//; $Groups{$1}{DESC} = $_; } } ##################################################################### # The CGI_HANDLERS deal with basic CGI POST or GET method request # elements such as those delivered by an HTTPD form, i.e. a url # encoded line of "=" separated key=value pairs separated by &'s # Routines: # get_request: reads the request and returns both the raw and # processed version. # url_decode: URL decodes a string or array of strings # html_header: Transmits a HTML header back to the caller # html_trailer: Transmits a HTML trailer back to the caller # Author: # James Tappin: sjt@xun8.sr.bham.ac.uk # School of Physics & Space Research University of Birmingham # Feb 1993. # Copyright & Disclaimer. # This set of routines may be freely distributed, modified and # used, provided this copyright & disclaimer remains intact. # This package is used at your own risk, if it does what you # want, good; if it doesn't, modify it or use something else--but # don't blame me. Support level = negligable (i.e. mail bugs but # not requests for extensions) sub get_request { # Subroutine get_request reads the POST or GET form request from STDIN # into the variable $request, and then splits it into its # name=value pairs in the associative array %rqpairs. # The number of bytes is given in the environment variable # CONTENT_LENGTH which is automatically set by the request generator. # Encoded HEX values and spaces are decoded in the values at this # stage. # $request will contain the RAW request. N.B. spaces and other # special characters are not handler in the name field. if ($ENV{'REQUEST_METHOD'} eq "POST") { read(STDIN, $request, $ENV{'CONTENT_LENGTH'}); } elsif ($ENV{'REQUEST_METHOD'} eq "GET" ) { $request = $ENV{'QUERY_STRING'}; } %rqpairs = &url_decode(split(/[&=]/, $request)); } sub url_decode { # Decode a URL encoded string or array of strings # + -> space # %xx -> character xx foreach (@_) { tr/+/ /; s/%(..)/pack("c",hex($1))/ge; } @_; } sub html_header { # Subroutine html_header sends to Standard Output the necessary # material to form an HHTML header for the document to be # returned, the single argument is the TITLE field. local($title) = @_; print "Content-type: text/html\n\n"; print "\n"; print "$title\n"; print "\n\n"; } sub html_trailer { # subroutine html_trailer sends the trailing material to the HTML # on STDOUT. local($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = gmtime; local($mname) = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[$mon]; local($dname) = ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat")[$wday]; print "

\nGenerated by: $0
\n"; printf "Date: %02d:%02d:%02d GMT on $dname $mday $mname $year.

\n", $hour, $min, $sec; print "\n"; } __END__ comp Topics of interest to both computer professionals and hobbyists, including topics in computer science, software source, and information on hardware and software systems. sci Discussions marked by special and usually practical knowledge, relating to research in or application of the established sciences. misc Groups addressing themes not easily classified under any of the other headings or which incorporate themes from multiple categories. soc Groups primarily addressing social issues and socializing. talk Groups largely debate-oriented and tending to feature long discussions without resolution and without appreciable amounts of generally useful information. news Groups concerned with the news network and software themselves. rec Groups oriented towards the arts, hobbies and recreational activities. alt Largest collection of groups outside the big-7 hierarchy. Many Usenet sites do not receive some or all of these groups. bionet Groups for topics interesting to biologists called bionet" originating from net.bio.net and currently carried at over a third of the sites participating in the Arbitron readership survey. bit Groups for redistributions of the more popular BitNet LISTSERV mailing lists. biz Groups carried and propagated by sites interested in the world of business products around them -- in particular, computer products and services. This includes product announcements, announcements of fixes and enhancements, product reviews, and postings of demo software. clari Groups gatewayed from commercial news services and other ``official'' sources. eye EYE magazine, based in Toronto, Ontario, Canada, is a free newspaper issued once a week. The scope of articles is greater than just Toronto, so the hierarchy is available on a worldwide basis. gnu gnUSENET (gnUSENET is Not USENET) is a set of newsgroups bi-directionally gated with the Internet mailing lists of the GNU Project of the Free Software Foundation. hepnet HEPnet is a collections of networks interconnecting high-energy and nuclear physics research sites. ieee The IEEE newsgroups concern the IEEE -- the Institute of Electrical and Electronics Engineers. info Collection of mailing lists gatewayed into news at the University of Illinois. The lists are selected based on local interests but have proven popular at a number of sites. k12 K12Net is a collection of conferences devoted to K-12 educational curriculum, language exchanges with native speakers, and classroom-to-classroom projects designed by teachers. relcom Hierarchy of Russian-language newsgroups distributed mostly on the territory of the former Soviet Union (non-CIS countries included). u3b Groups dealing with AT&T 3B{2,5,15,20,4000} computers -- everything except for the UNIX PC/3B1. vmsnet Hierarchy for topics of interest to VAX/VMS users (but not necessarily VMS-specific). boulder Hierarchy for Boulder, Colorado. austin Hierarchy for Austin, Texas. co Hierarchy for Colorado, USA. dfw Hierarchy for Dallas and Fort Worth, Texas. cu Hierarchy of the University of Colorado at Boulder. cu-den Hierarchy of the University of Colorado at Denver. csu Hierarchy of the University of Colorado state system. de Hierarchy for German-language groups. ny Hierarchy for New York (state), USA. nj Hierarchy for New Jersey, USA. sun Sun internal newsgroups. convex Convex internal newsgroups. tx Hierarchy for Texas, USA. ucb Hierarchy of the University of California at Berkeley. ucsd Hierarchy of the University of California at San Diego. ba Hierarchy for the Bay Area in northern California. ncar Hierarchy for the National Center for Atmospheric Research. bu Hierarchy for Boston University. csn Hierarchy for Colorado SuperNet. us Hierarchy for the United States. uk Hierarchy for the United Kingdom. courts Current court cases. humanities Hierarchy for the arts and humanities. to Administrative groups with log info on news transfers.