# # file: edgar-util.pl # desc: general utilities for EDGAR processing # ########################################################################## # Copyright (c) 1994, 1995 Internet Multicasting Service # # The SEC EDGAR Level 1 Dissemination processing software ("software") # was developed by the Internet Multicasting Service and may # be used for academic, research, government, and internal business # purposes without charge. You may not resell this code or include it # in a product that you are selling without prior permission of the # Internet Multicasting Service. # # This software is provided ``as is'', without express or implied # warranty, and with no support nor obligation to assist in its # use, correction, modification or enhancement. We assume no liability # with respect to the infringement of copyrights, trade secrets, or any # patents, and are not responsible for consequential damages. Proper # use of the software is entirely the responsibility of the user. ########################################################################## package EDGAR::util; # # get next consecutive file name based on input file name # - expects 'file01' format, may work with other formats # but could have *unexpected* results. # sub get_next_file { local($file) = shift; local(@tmp) = (); while ( -e $file ) { @tmp = split(/\./, $file); $tmp[$#tmp]++; $file = join('.', @tmp); } return $file; } # # make directory hierarchy as needed # sub makepath { local($path) = shift; local($mode) = shift; local($newpath) = ''; # assumes '/' as path delimiter foreach $dir (split(/\//, $path)) { $newpath .= "$dir/"; if (! -d $newpath) { mkdir($newpath, $mode); } } } # # generate directory path from accession number # sub accno2path { my($accno) = shift; # split ascension number input its components, CIK, submission year, # and sequence number my($cik,$yr,$seqno) = split('-', $accno); # Y2K: bad date things may happen...until the SEC fixes the 2-digit year $yr = ($yr < 70) ? 2000+$yr : 1900+$yr; # output path is: # # .../4-digit year # /digits1-2 of CIK # /digits3-6 of CIK # /CIK (minus leading zeroes) # my($path) = ""; $path .= $yr . "/"; $path .= substr($cik, 0, 2) . "/"; $path .= substr($cik, 2, 4); # remove leading zeroes $cik =~ s/^0+//; # test data uses all zero CIK unless ($cik) { # test CIK of all zeroes...ignore return ""; } $path .= "/" . $cik; $path; } # # test to see if two numbers are "how" close together # sub close2equal { my($a) = shift; my($b) = shift; my($how) = shift; return 1 if ($a == $b); if (abs($a-$b) > $how) { return 0; } else { return 1; } } # # remove trailing and leading spaces # sub trim { my($text) = shift; # remove leading and trailing white space $text =~ s/^\s+|\s+$//g; # remove leading periods, followed by white space $text =~ s/^\.\s+//g; # remove <..> elements $text =~ s/<.*?>//g; $text; } # # encode attribute values properly # sub encode_av { my($foo) = shift; return "\"\"" if (!$foo); # remove trailing and leading spaces $foo = &trim($foo); # avoid double encoding & $foo =~ s/\&/\&/g; # order is important with & substitution -- keep it first $foo =~ s/\&/\&/g; $foo =~ s/'/\'/g; # ' for color syntax $foo =~ s/"/\"/g; # " for color syntax $foo =~ s//\>/g; return "\"$foo\""; } # # prints out what is stored in nested hash str # sub dumpHashDebug { my $hash_ref = shift; my $indent = shift; my $key; $indent = " " if !$indent; foreach $key (keys %{$hash_ref}) { print STDERR $indent, "$key <=> $hash_ref->{$key}\n"; if ($hash_ref->{$key} =~ /HASH/) { &dumpHashDebug($hash_ref->{$key}, $indent . $indent); } elsif ($hash_ref->{$key} =~ /ARRAY/) { &dumpArrayDebug($hash_ref->{$key}, $indent . $indent); } } print STDERR "\n"; } # # prints out what is stored in nested hash str # sub dumpArrayDebug { my $array_ref = shift; my $indent = shift; my $element; $indent = " " if !$indent; print STDERR $indent; foreach $element (@{$array_ref}) { print STDERR "$element|"; if ($element =~ /HASH/) { # &dumpHashDebug($array_ref->{$element}, $indent . $indent); } elsif ($element =~ /ARRAY/) { print STDERR "\n"; &dumpArrayDebug($element, $indent . $indent); } } print STDERR "\n"; } # # convert date from YYYYMMDD to form like 'November 11 1969' # sub convertDate { my $number_date = shift; my %month_hash = ( 1 => 'January', 2 => 'February', 3 => 'March', 4 => 'April', 5 => 'May', 6 => 'June', 7 => 'July', 8 => 'August', 9 => 'September', 10 => 'October', 11 => 'November', 12 => 'December', ); my $year = substr($number_date, 0, 4); my $month = substr($number_date, 4, 2); $month =~ s/^0//; my $mday = substr($number_date, 6, 2); $mday =~ s/^0//; my $lmonth = $month_hash{$month}; if (!$lmonth) { # ERROR but what do I do? $lmonth = $month_hash{1}; } return "$lmonth $mday $year"; } # keep require happy 1; =head1 NAME edgar-util.pl - EDGAR general purpose routines =head1 PACKAGE EDGAR::util =head1 SYNOPSIS require edgar-util.pl; =head1 REQUIRES Perl, version 5.001 or higher. =head1 DESCRIPTION EDGAR general purpose routines. =over 3 =head1 METHODS =head2 makepath =item * creates a directory path with a particular mode. =item example: EDGAR::util::makepath("/usr/local/edgar", "0755"); =head2 accno2path =item * creates a directory path based on the accession number. =item example: EDGAR::util::accno2path("111-1999-01"); =head2 close2equal =item * tests to see if two numbers are "how" close together returns 1 if the first number and second number are within the third number's distance apart. returns 0 otherwise. =item example: EDGAR::util::close2equal(5, 7, 2); returns 1 =back =cut