|
|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: T r
Length: 11141 (0x2b85)
Types: TextFile
Names: »report.awk«
└─⟦a0efdde77⟧ Bits:30001252 EUUGD11 Tape, 1987 Spring Conference Helsinki
└─⟦this⟧ »EUUGD11/euug-87hel/sec1/news/misc/report.awk«
└─⟦9ae75bfbd⟧ Bits:30007242 EUUGD3: Starter Kit
└─⟦95173b3df⟧ »EurOpenD3/news/bnews.2.11/misc.tar.Z«
└─⟦ff4664b96⟧
└─⟦this⟧ »misc/report.awk«
From cbosgd!ucbvax!usenet Mon Oct 13 05:39:17 1986
Received: by beno.CSS.GOV (5.54/5.17)
id AA01253; Mon, 13 Oct 86 05:39:12 EDT
Received: from cbosgd.UUCP by seismo.CSS.GOV (5.54/1.14)
id AA03513; Mon, 13 Oct 86 05:39:11 EDT
Received: by cbosgd.ATT.COM (4.12/UUCP-Project/rel-1.0/06-28-86)
id AA08778; Mon, 13 Oct 86 03:44:14 edt
Received: by ucbvax.Berkeley.EDU (5.53/1.17)
id AA15536; Sun, 12 Oct 86 23:51:25 PDT
Date: Sun, 12 Oct 86 23:51:25 PDT
From: ucbvax!usenet (USENET News Administration)
Message-Id: <8610130651.AA15536@ucbvax.Berkeley.EDU>
To: cbosgd!backbone
Subject: a handy awk script for netnews log reports
Status: R
Since Mark didn't have a copy of this, I will assume that most of the
rest of you don't either, and send it along. The comments should be
explanation enough... If not, ask me.
Erik E. Fair ucbvax!fair fair@ucbarpa.berkeley.edu
-------------------------------------------------------------------------------
# USAGE: awk -f report_awk /usr/lib/news/log
#
# AWK script which eats netnews log files and produces a summary of USENET
# traffic and errors over the period of time that the log was collected.
#
# August 31, 1986
#
# Erik E. Fair <dual!fair>
# Original Author, May 22, 1984
#
# Brad Eacker <onyx!brad>
# Modified to simplify the record processing and to sort the output.
#
# Erik E. Fair <dual!fair>
# Modifed to provide information about control messages.
#
# Erik E. Fair <dual!fair>
# Bug in system name extraction fixed. It was assumed that the forth field
# (system name) always had a dot. local is one that doesn't. Some others
# (including 2.9 sites) don't either.
#
# Earl Wallace <pesnta!earlw>
# The "sent" field was changed from $5 to $6 in 2.10.2 (beta)
# named "newstats" and called with no arguments.
#
# Erik E. Fair <dual!fair>
# Remove support for 2.10.1, revise for 2.10.2 to provide information
# about junked articles, garbled articles, and bad newsgroups
#
# Erik E. Fair <ucbvax!fair>
# Minor bug fix to bad newsgroup reporting, also now counting ``old''
# articles as junked, with counter for number that are `old'.
#
# Erik E. Fair <ucbvax!fair>
# Fix up the domain & local hosts support
#
# Erik E. Fair <ucbvax!fair>
# Fix up the counting of gatewayed material, add counting of "linecount"
# problems. Additional cleanup to make things faster.
#
BEGIN{
#
# this is the prefix that your site uses in hostnames to identify your
# hosts (e.g. ucbarpa, ucbvax, su-score, mit-mc, mit-ai)
# You will probably want to change (or add to) the following line
#
lprefix = "ucb";
lplen = length(lprefix);
#
# If you do bi-directional USENET gatewaying (e.g. mailing list
# to newsgroup where the material flows both ways freely), this
# should be the name in the sys file that you use to mail stuff
# to the mailing lists.
#
pseudo = "internet";
rptname = "(GATEWAY)";
#
# Top level domain names and what network they represent
# (for use in counting stuff that is gatewayed)
#
domains["ARPA"] = rptname;
domains["arpa"] = rptname;
domains["EDU"] = rptname;
domains["edu"] = rptname;
domains["GOV"] = rptname;
domains["gov"] = rptname;
domains["COM"] = rptname;
domains["com"] = rptname;
domains["MIL"] = rptname;
domains["mil"] = rptname;
domains["ORG"] = rptname;
domains["org"] = rptname;
domains["NET"] = rptname;
domains["net"] = rptname;
domains["UK"] = rptname;
domains["uk"] = rptname;
domains["DEC"] = rptname;
domains["dec"] = rptname;
domains["CSNET"] = rptname;
domains["csnet"] = rptname;
domains["BITNET"] = rptname;
domains["bitnet"] = rptname;
domains["MAILNET"] = rptname;
domains["mailnet"] = rptname;
domains["UUCP"] = rptname;
domains["uucp"] = rptname;
domains["OZ"] = rptname;
domains["oz"] = rptname;
domains["AU"] = rptname;
domains["au"] = rptname;
#
# tilde chosen because it is ASCII 126 (don't change this)
#
invalid = "~~~~~~";
#
accept[invalid] = 0;
reject[invalid] = 0;
xmited[invalid] = 0;
control[invalid] = 0;
junked[invalid] = 0;
neighbor[invalid] = 0;
badgrp = 0;
garbled = 0;
lcount = 0;
canfail = 0;
candup = 0;
insfail = 0;
old = 0;
}
#
# Skip some things that we won't bother with
#
/^$/ { next }
$5 == "from" { next }
$5 == "make" { next }
$5 == "Cancelling" { next }
#
# Or that we just count
#
$5 == "Inbound" { garbled++; next }
$6 == "cancel" { canfail++; next }
$6 == "Cancelled" { candup++; next }
$6 == "install" { insfail++; next }
#
# Articles sent to remote systems (this is what 2.10.2 (beta) says)
#
$6 == "sent" {
for(j = 8; j <= NF; j++) {
comma = index( $(j), ",");
if (comma != 0) $(j) = substr( $(j), 1, (comma - 1));
if ($(j) == pseudo) $(j) = rptname;
else neighbor[$(j)] = 1;
xmited[$(j)]++;
}
next;
}
#
# Articles sent to remote systems (this is what 2.11 says)
#
$5 == "sent" {
for(j = 7; j <= NF; j++) {
comma = index( $(j), ",");
if (comma != 0) $(j) = substr( $(j), 1, (comma - 1));
if ($(j) == pseudo) $(j) = rptname;
else neighbor[$(j)] = 1;
xmited[$(j)]++;
}
next;
}
#
# Get the name of the system that did this,
# taking into account that not everyone believes in domains.
#
{
# if we get a route addr (we shouldn't, but...), take the last one
#
nhosts = split($4, hosts, "@");
hostname = hosts[nhosts];
#
# get the root domain name, and the hostname
#
ndoms = split(hostname, doms, ".");
domain = doms[ndoms];
sys = doms[1];
#
# check for local system, and if not that, then internet sites.
# special case the network name replacement of specific host names,
# such that the network name is there only on a `local' posting
# (which is really gatewaying in disguise)
#
if ($5 == "posted") {
prefix = substr(sys, 1, lplen);
if (prefix == lprefix) {
sys = "local";
} else {
dom = domains[domain];
if (dom) sys = dom;
}
}
}
#
# Duplicates & receiveds/posted & control messages
#
$5 == "posted" || $5 == "received" {
accept[sys]++;
if ($5 == "received") neighbor[sys] = 1;
nng = split($8, ngl, ",");
for(i = 1; i <= nng; i++) {
dot = index(ngl[i], ".");
if (dot) ng = substr(ngl[i], 1, (dot - 1));
else ng = ngl[i];
if (ng) newsgcnt[ng]++;
}
next;
}
$5 == "Duplicate" { reject[hostname]++; next }
$6 == "valid" { junked[sys]++; next }
$6 == "too" { junked[sys]++; old++; next }
$5 == "Unknown" {
x = length($7) - 2;
ng = substr($7, 2, x);
badng[ng]++;
badgrp++;
next;
}
#
# articles who actual line count differs from the Line: header count
#
$5 == "linecount" {
expect = $7;
# awk does very strange things with non-numeric characters in numbers
comma = index(expect, ",");
if (comma != 0) expect = substr(expect, 1, (comma - 1));
got = $9;
diff = got - expect;
lcount++;
alc_host[sys] = 1;
neighbor[sys] = 1;
if (diff < 0) {
diff = 0 - diff;
a_nshort[sys]++;
a_short[sys] += diff;
if (a_smax[sys] < diff) a_smax[sys] = diff;
} else {
a_nlong[sys]++;
a_long[sys] += diff;
if (a_lmax[sys] < diff) a_lmax[sys] = diff;
}
next;
}
#
# articles who actual line count is Zero
#
$7 == "linecount" {
lcount++;
a_zero[sys]++;
reject[sys]++;
next;
}
#
# Control messages
#
$5 == "Ctl" {
ctot++;
control[sys]++;
ctlcnt[$(10)]++;
next;
}
#
# Print anything we didn't recognize, it's probably an error message.
# For the submitted report to USENET, do sed -e '1,/^$/d' file | inews
# so that this cruft doesn't get out the door.
#
{
print;
}
#
# Summarize and print the report
#
END{
# special processing for Duplicates, because we can't tell if
# they came from a netnews neighbor or from the gatewaying
# activities until we have processed the entire log.
#
for( hostname in reject ) {
#
# get the root domain name, and the hostname
#
ndoms = split(hostname, doms, ".");
domain = doms[ndoms];
sys = doms[1];
if (! neighbor[sys]) {
prefix = substr(sys, 1, lplen);
if (prefix == lprefix) {
sys = "local";
} else {
dom = domains[domain];
if (dom) sys = dom;
}
}
i = reject[hostname];
reject[hostname] = 0;
reject[sys] += i;
}
rtot = 0;
for( i in reject ) {
if (reject[i] > 0) {
list[i] = 1;
rtot += reject[i];
}
}
atot = 0;
for( i in accept ) {
list[i] = 1;
atot += accept[i];
}
xtot = 0;
for( i in xmited ) {
list[i] = 1;
xtot += xmited[i];
}
ctot = 0;
for( i in control ) {
list[i] = 1;
ctot += control[i];
}
jtot = 0;
for( i in junked ) {
list[i] = 1;
jtot += junked[i];
}
#
# ctot is part of rtot, so we don't add it in to the grand total.
#
totarticles = atot + rtot;
if (totarticles == 0) totarticles = 1;
printf("\nSystem \tAccept\tReject\tJunked\tXmit to\tControl\t%% total\t%% rejct\n");
for( ; ; ) {
# selection sort
i = invalid;
for( j in list ) {
if ( list[j] > 0 && j < i ) i = j;
}
if ( i == invalid ) break;
list[i] = 0;
#
# control & junked are counted under accept.
#
sitetot = accept[i] + reject[i];
if (sitetot == 0) sitetot = 1;
articles[i] = sitetot;
#
# What an 'orrible printf spec
#
printf("%-14s\t%6d\t%6d\t%6d\t%7d\t%7d\t%6d%%\t%6d%%\n", i, accept[i], reject[i], junked[i], xmited[i], control[i], (sitetot * 100) / totarticles, (reject[i] * 100) / sitetot);
#
}
printf("\nTOTALS \t%6d\t%6d\t%6d\t%7d\t%7d\t%6d%%\t%6d%%\n", atot, rtot, jtot, xtot, ctot, 100, (rtot * 100) / totarticles);
printf("\nTotal Articles processed %d", totarticles);
if (old) printf(", old %d", old);
if (garbled) printf(", garbled %d", garbled);
if (insfail) printf(", uninstallable %d", insfail);
printf("\n");
if (ctot) {
printf("\nControl Invocations\n");
for( i in ctlcnt ) {
if (i == "cancel") {
printf("%-12s %6d", i, ctlcnt[i]);
if (canfail) printf(", %d failed", canfail);
if (candup) printf(", %d duplicate", candup);
printf("\n");
} else {
printf("%-12s %6d\n", i, ctlcnt[i]);
}
}
}
if (lcount) {
printf("\nReceived Article Length Problems\n");
printf("System Zero Short Smax Savg Long Lmax Lavg Total %% Tot\n");
for( i in alc_host ) {
nlong = a_nlong[i];
nshort = a_nshort[i];
if (nlong == 0) nlong = 1;
if (nshort == 0) nshort = 1;
lavg = a_long[i] / nlong;
savg = a_short[i] / nshort;
sitetot = (a_zero[i] + a_nshort[i] + a_nlong[i]);
printf("%-14s %5d %5d %5d %5d %5d %5d %5d %5d %4d%%\n", i, a_zero[i], a_nshort[i], a_smax[i], savg, a_nlong[i], a_lmax[i], lavg, sitetot, (sitetot * 100) / articles[i]);
}
}
if (atot) {
printf("\nNetnews Categories Received\n");
l = 0;
for( i in newsgcnt ) {
if (l < length(i)) l = length(i);
}
fmt = sprintf("%%-%ds %%6d\n", l);
for( ; ; ) {
# selection sort
max = 0;
for( j in newsgcnt ) {
if (newsgcnt[j] > max) {
i = j;
max = newsgcnt[j];
}
}
if (max == 0) break;
printf(fmt, i, newsgcnt[i]);
newsgcnt[i] = 0;
}
}
if (badgrp) {
printf("\nBad Newsgroups Received\n");
l = 0;
for( i in badng ) {
if (l < length(i)) l = length(i);
}
fmt = sprintf("%%-%ds %%5d\n", l);
for( ; ; ) {
# selection sort
i = invalid;
for( j in badng ) {
if (badng[j] > 0 && j < i) i = j;
}
if (i == invalid) break;
printf(fmt, i, badng[i]);
badng[i] = 0;
}
}
}