#!/iw/perl/current/bin/perl
#
# file: edgar2html
# desc: convert SEC EDGAR (edgardoc DTD format) XML documents to HTML
#
# limit scope...keep this first
package EDGAR::html;
eval 'exec /iw/perl/current/bin/perl -s $0 ${1+"$@"}'
if 0;
use strict;
use vars qw($VERSION $RELEASE);
#
# current module verion
my $Id =<<'EoI';
# $Id: //depot/isms/skulker/edgar/1.2.4/bin/edgar2html_NF#1 $
EoI
#
my $RELEASE = sprintf("%d", $Id =~ /^# \$Id: .*#(\d+)/);
my $VERSION = "1.2.2";
BEGIN {
my $iw_root = ($ENV{IW}) ? $ENV{IW} : "/iw";
my $edgar_root = "$iw_root/skulker/edgar/current";
# where we find our local libraries
my($libdir) = "$edgar_root/lib";
if ( -d $libdir) {
unshift(@INC, $libdir);
}
# HTML template support
use HTML::Template;
# text to HTML conversion
use HTML::FromText;
# generic edgar support data
require 'edgar-lib.pl';
# generic edgar utility support
require 'edgar-util.pl';
# edgar HTML support
require 'edgar-html.pl';
# edgar XML support
require 'edgar-xml.pl';
}
# we want perl 5.00x or later
require 5.004;
# who am i?
my $prog;
($prog = $0) =~ s#.*/##;
# for processing command line options
use Getopt::Std;
# process command line options, if any
my %opt;
getopts('D:Fb:h:V:vH', \%opt) || &usage($prog);
# debug mode?
my $debug = defined($opt{'D'}) ? $opt{'D'} : 0;
# force HTML generation?
my $force = defined($opt{'F'});
# which template version to use?
my $tmplversion = (defined($opt{'V'})) ? $opt{'V'} : 2;
# EDGAR base/root directory
my $basedir = defined($opt{'b'}) ? "$opt{'b'}" : $EDGAR::lib::edgar_data;
EDGAR::util::makepath($basedir, 0775);
# where to place HTML submissions
my $htmldir = defined($opt{'h'}) ? "$opt{'h'}" : "$basedir/html";
EDGAR::util::makepath($htmldir, 0775);
# HTML template file
my $htmltmpl = $EDGAR::HTML::TMPL;
if ($tmplversion == 2) {
$htmltmpl = $EDGAR::HTML::TMPLV2;
}
# print the version
if ($opt{'v'}) {
print &version(), "\n";
exit;
}
# print the usage
&usage($prog) if $opt{'H'};
sub usage {
my $prog = shift;
print "usage: $prog [-D -F -b -h -V -v -H] input_file\n";
print " -D : debug mode, 0-9\n";
print " -F : force HTML generation\n";
print " -b : base dir EDGAR base/root directory\n";
print " -h : html base dir where to place HTML submission\n";
print " -V : template version, version 1 or 2\n";
print " -v : version, print out the version number and exit\n";
print " -H : help, print out this message and exit\n";
print "\n";
exit;
}
# set up stuff for run log and Debugging
use Logger;
# set up stuff for run log and Debugging
my $do_logging = 0;
$do_logging = 1 if ($debug > 8);
$SIG{'ALRM'} = \&CATCH_SIGALRM;
$SIG{'INT'} = \&CATCH_SIGINT;
$logger::DEBUG = Logger->new();
$logger::DEBUG->setupLogger('-base-dir' => "/tmp", '-base-filename' => $prog,
'-do-logging' => $do_logging);
my($file, $pid);
my $prog_pid = $prog . "-" . $$ . ".pid";
# and we're off...
foreach $file (@ARGV) {
unless ( -f $file ) {
print STDERR "$prog: unable to read $file: $!\n"
if ($debug > 1);
next;
}
my $accno = $file;
$accno =~ s|^.*/||;
$accno =~ s|\.xml$||;
print STDERR "$file"
if ($debug);
# get output filename
my $ofile = get_filename($accno);
# skip if it already exists ... for now
if (-f $ofile && ! $force) {
print STDERR " ... Skipping!\n"
if ($debug);
next;
}
print STDERR "\n"
if ($debug);
if ($do_logging && (! -e "/tmp/$prog_pid")) {
`touch /tmp/$prog_pid`;
$logger::DEBUG->logDateTime();
}
# parse EDGAR XML format doc (edgardoc.dtd format)
my %edgardoc = EDGAR::XML::parsefile($file);
$logger::DEBUG->logFileInfo('-file-name' => $file,
'-accno' => $edgardoc{'submission'}{'accession-number'},
'-type' => $edgardoc{'submission'}{'type'});
# generate HTML format edgardoc
my $html = &EDGAR::HTML::toHTML(\%edgardoc, $htmltmpl,
$tmplversion);
# spit out the HTML
&output_HTML($ofile, $html);
$logger::DEBUG->outputLogger();
$logger::DEBUG->reInit();
}
$logger::DEBUG->shutdownLogger();
`rm -f /tmp/$prog_pid` if (-e "/tmp/$prog_pid");
# c'ya
exit 0;
#
# generate HTML output
#
sub output_HTML {
my $ofile = shift;
my $html = shift;
print STDERR " ** DEBUG1: ofile: ($ofile) **\n"
if ($debug > 1);
open(HTML, ">$ofile") || die "$prog: error writing $ofile: $!\n";
print HTML $html, "\n";
close HTML;
0;
}
#
# get output filename
#
sub get_filename {
my $accno = shift;
# get std path from accession number
my $path = &EDGAR::util::accno2path($accno);
# test data uses all zero CIK
unless ($path) {
# test submission has all zero CIK
print STDERR " Skipping test submission: $file\n"
if ($debug);
return;
}
my $htmlpath = "$htmldir";
$htmlpath .= "/" . $path;
my($ofile) = "";
if ($debug > 1) {
$ofile = "debug.$accno.html";
} else {
# build the path if it doesn't exist
EDGAR::util::makepath($htmlpath, 0775);
$ofile = "$htmlpath/$accno.html";
}
$ofile;
}
sub CATCH_SIGALRM {
my $pid = $logger::DEBUG->savedPid();
print STDERR "\n\nCaught Interrupt SIGALRM, killing pid [$pid]\n";
$logger::DEBUG->writeMessage('-message' =>
"Caught Interrupt SIGALRM, killing pid [$pid]");
$logger::DEBUG->haltOff();
$logger::DEBUG->outputLogger();
$logger::DEBUG->reInit();
kill 1, $pid;
}
sub CATCH_SIGINT {
$logger::DEBUG->writeMessage('-message' => "Caught Interrupt SIGINT");
$logger::DEBUG->outputLogger();
$logger::DEBUG->shutdownLogger();
`rm -f /tmp/$prog_pid` if (-e "/tmp/$prog_pid");
die "\n\nCaught Interrupt SIGINT\n\n";
}
sub version {
my $ver = $VERSION . "r" . $RELEASE;
if (__PACKAGE__ !~ /^main$/) {
$ver = __PACKAGE__ . " " . $ver;
}
return $ver;
}