#
# file: edgar-forms.pl
# desc: parsed-edgar forms support
#
# limit scope
package EDGAR::Forms;
BEGIN {
my $iw_root = ($ENV{IW}) ? $ENV{IW} : "/iw";
my $edgar_root = "$iw_root/skulker/edgar/current";
# where we find our local libraries
my($libdir) = "$edgar_root/lib";
if ( -d $libdir) {
unshift(@INC, $libdir);
}
# general EDGAR utility support routines
require 'edgardoc-util.pl';
}
$debug = 0;
# map form type to parsing function
%formType = (
'144' => 'edgarParse144',
'3' => 'edgarParse3',
'4' => 'edgarParse4',
'5' => 'edgarParse5',
'SC 13D' => 'edgarParseSC13D',
'SC 13D/A' => 'edgarParseSC13D',
'SC 13G' => 'edgarParseSC13G',
'SC 13G/A' => 'edgarParseSC13G',
);
# map form type to IW class
%form2class = (
'144' => 'Beneficial Ownership',
'3' => 'Beneficial Ownership',
'4' => 'Beneficial Ownership',
'5' => 'Beneficial Ownership',
'SC 13D' => 'Beneficial Ownership',
'SC 13D/A' => 'Beneficial Ownership',
'SC 13G' => 'Beneficial Ownership',
'SC 13G/A' => 'Beneficial Ownership',
);
# map form type to HTML generator function (does this belong here? - bjb)
# these are for html parsing routines called in edgar-html.pl
%htmlType = (
'144' => 'edgarBeneficial1',
'3' => 'edgarBeneficial1',
'4' => 'edgarBeneficial1',
'5' => 'edgarBeneficial1',
'SC 13D' => 'edgarBeneficial2',
'SC 13D/A' => 'edgarBeneficial2',
'SC 13G' => 'edgarBeneficial2',
'SC 13G/A' => 'edgarBeneficial2',
);
#
# parse specified EDGAR form type
#
sub parse {
my $type = shift;
my $text = shift;
my $xml = "";
# get the name of the subroutine that corresponds to the type of document
$parseFunc = $formType{$type};
# ignore document types we don't handle yet - bjb
if (! $parseFunc) {
return "";
}
# call the subroutine, '$parseFunc' and get the parsed output.
$xml = &$parseFunc($text);
# return the XML encoded parsed-edgar data ... for now
$xml;
}
#
# pull main sections out for forms 3,4,5
#
sub getSectionsFor3_4_5
{
my($initialText) = shift;
my($header, $initialQuestions, $table1, $table2, $footer);
# some files have the title info in a table, get the tags out
if ($initialText =~ /
.*INITIAL STATEMENT OF BENEFICIAL OWNERSHIP OF SECURITIES.*<\/TABLE>/si) {
$initialText =~ s@
(.*INITIAL STATEMENT OF BENEFICIAL OWNERSHIP OF SECURITIES.*?)
@$1@si;
}
# get header stuff
($header) = $initialText =~ /(.*?)1.\s*Name and Address/gis;
# get initial data stuff
($initialQuestions) = $initialText =~ /.*?(\|*?\s*?1.\s*Name and Address.*?)TABLE\s*(1|I)/gis;
# get table I stuff
($table1) = $initialText =~ /.*?(TABLE (I|1).*?)TABLE II.*/gis;
# get table II stuff
($table2) = $initialText =~ /.*?(TABLE II.*)Explanation of Responses.*/gis;
# get footer stuff
($footer) = $initialText =~ /.*Explanation of Responses:(.*)/is;
return($header, $initialQuestions, $table1, $table2, $footer);
}
#
# EDGAR document type 3
#
sub edgarParse3
{
my($initialText) = shift;
my($text, $textOut, $idInfo, $addressInfo, $issuerName);
my($eventMonth, $eventDay, $eventYear, $amendMonth, $amendDay,
$amendYear, $director, $owner, $officer, $other, $relationAddendum);
my($table1, $table2, $footer, $signature, $date);
my($notSubjectToSection16, $form3HoldingsReported, $form4TransactionsReported);
my($requiredFields) = 1;
print "\n\n--- BEGIN PARSE TYPE 3 ---\n\n" if $debug > 0;
$initialText = &EDGAR::XML::decode($initialText);
$initialText = &EDGARdoc::util::fixText($initialText);
($header, $initialQuestions, $table1, $table2, $footer) =
getSectionsFor3_4_5($initialText);
if ($debug > 5) {
print "~~~~~~~~~~~~~~~~~~~~~~ HEADER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "$header\n";
print "~~~~~~~~~~~~~~~~~~~~~~ HEADER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "~~~~~~~~~~~~~~~~~~ INITIAL QUESTIONS ~~~~~~~~~~~~~~~~~~~~~~~\n";
print "$initialQuestions\n";
print "~~~~~~~~~~~~~~~~~~ INITIAL QUESTIONS ~~~~~~~~~~~~~~~~~~~~~~~\n";
print "~~~~~~~~~~~~~~~~~~~~~~ TABLE I ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "$table1\n";
print "~~~~~~~~~~~~~~~~~~~~~~ TABLE I ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "~~~~~~~~~~~~~~~~~~~~~~ TABLE II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "$table2\n";
print "~~~~~~~~~~~~~~~~~~~~~~ TABLE II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "~~~~~~~~~~~~~~~~~~~~~~ FOOTER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "$footer\n";
print "~~~~~~~~~~~~~~~~~~~~~~ FOOTER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
}
#------------------------------------------------------------------------
# HEADER PORTION
#------------------------------------------------------------------------
# get whether it's no longer subject to section 16
($notSubjectToSection16, $form3HoldingsReported,
$form4TransactionsReported) = &EDGARdoc::util::getFormHeaderInfo($header);
#------------------------------------------------------------------------
# INITIAL QUESTIONS PORTION
#------------------------------------------------------------------------
# get elements from the initial questions section
($firstName, $middleName, $lastName, $address, $city, $state, $zipCode,
$issuerName, $issuerTicker, $idInfo, $eventMonth, $eventDay, $eventYear,
$amendMonth, $amendDay, $amendYear, $director, $owner, $officer, $other,
$relationAddendum, $indFiling, $jointFiling) =
EDGARdoc::util::getFormInitialQuestionsInfo($initialQuestions);
# abort if we are missing required field(s)
unless ($firstName && $lastName && $address) {
warn "EDGAR::Forms::parse3: missing required field(s)...aborting!\n";
return "";
}
#------------------------------------------------------------------------
# TABLE I PORTION
#------------------------------------------------------------------------
# if we still have the header
if ($table1 =~ /INITIAL STATEMENT OF.*BENEFICIAL OWNERSHIP/sig) {
$table1 =~ s/.*?(TABLE I.*)/
$1/sig;
}
# we don't have TABLE I, we have TABLE II
if ($table1 =~ /TABLE II -+ Derivative Sec\w+s Beneficially Owned/si) {
($table1) = $initialText =~ /.*?(TABLE I.*)TABLE II.*/sig;
}
# get rid of pipes and underscores in data
$table1 =~ s/-\|-/---/g;
$table1 =~ s/\|/ /g;
$table1 =~ s/_/ /g;
# remove the table header, in case table tags were put in wrong spot
$table1 =~ s/.*TABLE (I|1) -+ Non-Derivative Securities.*?Owned.*?\n/
/si;
# remove extra footnote data for Table I
$table1 =~ s/\n\s*Reminder:\s+Report.*//si;
# replace blank lines with --- lines
$table1 =~ s/\n\s*\n/\n------------------------------------------\n/gs;
if ($debug > 5) {
print "\n\n===================== TABLE I SENT =======================\n";
print $table1;
print "\n===================== TABLE I SENT =======================\n\n\n";
}
($table1Rows, $table1Cols, @table1Arr) = &EDGARdoc::util::parseTable($table1);
if ($debug > 5) {
print "rows: $table1Rows\n";
print "columns: $table1Cols\n";
for ($rowCnt = 0; $rowCnt < $table1Rows; $rowCnt++) {
for ($colCnt = 0; $colCnt < $table1Cols; $colCnt++) {
print "item R $rowCnt, C $colCnt: $table1Arr[$rowCnt][$colCnt]\n";
}
}
}
#------------------------------------------------------------------------
# TABLE II PORTION
#------------------------------------------------------------------------
# remove the table header, in case table tags were put in wrong spot
$table2 =~ s/TABLE II - Derivative Securities Beneficially Owned.*?\n//i;
# we have TABLE I with TABLE II
if ($table2 =~ /TABLE I -+ Non-Derivative Sec\w+s Beneficially Owned/si) {
($table2) = $initialText =~ /.*?(TABLE II.*)/sig;
$table2 = "
" . $table2;
}
# get rid of pipes in data
$table2 =~ s/-\|-/---/g;
$table2 =~ s/\|/ /g;
$table2 =~ s/_/ /g;
# remove extra footnote data for Table I
$table2 =~ s/Explanation of Responses:.*//si;
if ($debug > 5) {
print "\n\n===================== TABLE II SENT =======================\n";
print $table2;
print "\n===================== TABLE II SENT =======================\n\n\n";
}
($table2Rows, $table2Cols, @table2Arr) = &EDGARdoc::util::parseTable($table2);
if ($debug > 5) {
print "rows: $table2Rows\n";
print "columns: $table2Cols\n";
for ($rowCnt = 0; $rowCnt < $table2Rows; $rowCnt++) {
for ($colCnt = 0; $colCnt < $table2Cols; $colCnt++) {
print "item R $rowCnt, C $colCnt: $table2Arr[$rowCnt][$colCnt]\n";
}
}
}
#------------------------------------------------------------------------
# FOOTER PORTION
#------------------------------------------------------------------------
($signature, $date) = EDGARdoc::util::getFormFooterInfo($footer);
if ($debug > 5) {
print "signature: $signature\n";
print "date: $date\n";
}
#------------------------------------------------------------------------
# verify the required fields exist
#------------------------------------------------------------------------
# filer first name
if (! ($firstName =~ /\w+/)) {
$requiredFields = 0;
warn "filer first name is required!\n";
}
# filer last name
if (! ($lastName =~ /\w+/)) {
$requiredFields = 0;
warn "filer last name is required!\n";
}
# filer street
if (! ($address =~ /\w+/)) {
$requiredFields = 0;
warn "filer street address is required!\n";
}
#------------------------------------------------------------------------
# XMLize the data
#------------------------------------------------------------------------
# verify required data is present
if ($requiredFields > 0) {
$textOut .= EDGARdoc::util::getBeneficial1StartXML($amendMonth, $amendYear,
$notSubjectToSection16, $form3HoldingsReported,
$form4TransactionsReported);
# HEADER
$textOut .= "\n";
# issuerName
$textOut .= "\n";
# filingType
if ($jointFiling) {
$textOut .= "\n";
} else {
$textOut .= "\n";
}
$textOut .= "\n";
# filer
if (($lastName =~ /\w+/) && ($firstName =~ /\w+/)) {
$textOut .= &EDGARdoc::util::getNameXML("filer", $firstName, $middleName,
$lastName);
# IRS id #
if ($idInfo =~ /\w+/) {
$textOut .= "". EDGAR::XML::trim($idInfo) ."\n";
}
# relation of reporting person
$textOut .= EDGARdoc::util::getRelationXML($director, $officer, $owner,
$other, $relationAddendum);
# address
$textOut .= "\n";
# postal
$textOut .= &EDGARdoc::util::getPostalXML($address, $city, $state, $zipCode);
$textOut .= "\n";
$textOut .= "\n";
}
$textOut .= &EDGARdoc::util::getDateXML("eventDate", $eventMonth, $eventDay,
$eventYear);
$textOut .= &EDGARdoc::util::getDateXML("originalDate", $amendMonth,
$amendDay, $amendYear);
$textOut .= "\n";
#
# XMLize TABLE I data
#
if ($table1Rows > 1) {
for ($rowCnt = 1; $rowCnt < $table1Rows; $rowCnt++) {
$textOut .= "\n";
# security title
if (($table1Arr[$rowCnt][0] =~ /\w+/) &&
($table1Arr[$rowCnt][0] !~ /Title of Security/)) {
$textOut .= "" . EDGAR::XML::trim($table1Arr[$rowCnt][0]);
$textOut .= "\n";
}
# ownership form
$textOut .= EDGARdoc::util::getOwnershipFormXML($table1Arr[$rowCnt][2]);
# nature of ownership
$textOut .= EDGARdoc::util::getNatureOfOwnershipXML($table1Arr[$rowCnt][3]);
# amount owned
$table1Arr[$rowCnt][1] = EDGARdoc::util::toNumber($table1Arr[$rowCnt][1]);
if ($table1Arr[$rowCnt][1] =~ /\w+/) {
$textOut .= "";
$textOut .= $table1Arr[$rowCnt][1];
$textOut .= "\n";
}
$textOut .= "\n";
}
}
#
# XMLize TABLE II data
#
if ($table2Rows > 3) {
for ($rowCnt = 3; $rowCnt < $table2Rows; $rowCnt++) {
if ($table2Arr[$rowCnt][0] !~ /Title of Derivative Security/si) {
$textOut .= "\n";
# security title
$table2Arr[$rowCnt][0] =~ s/-//gs;
if ($table2Arr[$rowCnt][0] =~ /\w+/) {
$textOut .= "";
$textOut .= EDGAR::XML::trim($table2Arr[$rowCnt][0]) . "\n";
}
# exercisable date
$table2Arr[$rowCnt][1] =~ s/-//gs;
if ($table2Arr[$rowCnt][1] =~ /\w+/) {
($month, $day, $year) = &EDGARdoc::util::getDateParts($table2Arr[$rowCnt][1]);
$textOut .= &EDGARdoc::util::getDateXML("exercisableDate", $month, $day, $year);
}
# expiration date
if ($table2Arr[$rowCnt][2] =~ /\w+/) {
($month, $day, $year) = &EDGARdoc::util::getDateParts($table2Arr[$rowCnt][2]);
$textOut .= &EDGARdoc::util::getDateXML("expirationDate", $month, $day, $year);
}
# underlying security
if ($table2Arr[$rowCnt][3] =~ /\w+/) {
$textOut .= "\n";
# amount of underlying security
$table2Arr[$rowCnt][4] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][4]);
if ($table2Arr[$rowCnt][4] =~ /\w+/) {
$textOut .= " ";
$textOut .= $table2Arr[$rowCnt][4];
$textOut .= "\n";
}
$textOut .= "\n";
}
# price
$table2Arr[$rowCnt][5] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][5]);
if ($table2Arr[$rowCnt][5] =~ /\w+/) {
$textOut .= "\n";
}
# ownership form
$textOut .= EDGARdoc::util::getOwnershipFormXML($table2Arr[$rowCnt][6]);
# nature of ownership
$textOut .= EDGARdoc::util::getNatureOfOwnershipXML($table2Arr[$rowCnt][7]);
$textOut .= "\n";
}
}
}
#
# footer
#
$textOut .= "\n";
$textOut .= "\n";
} else {
# required fields aren't present, return an empty string
$textOut = "";
if ($debug > 0) {
print "\n\nERROR: Required Fields are not present.\n";
}
}
if ($debug > 0) {
print "======================== TEXT OUT ===========================\n";
print $textOut;
print "\n======================== TEXT OUT ===========================\n";
print "\n\n--- END PARSE TYPE 3 ---\n\n";
}
$textOut = &EDGAR::XML::encode($textOut);
$textOut;
}
#
# EDGAR document type 4
#
sub edgarParse4
{
my($initialText) = shift;
my($text, $textOut);
# vars for main "chunks" of data
my($header, $initialQuestions, $table1, $table2, $footer);
my($notSubjectToSection16, $form3HoldingsReported,
$form4TransactionsReported);
my($idInfo, $addressInfo, $issuerName, $eventMonth, $eventDay, $eventYear,
$amendMonth, $amendDay, $amendYear, $director, $owner, $officer,
$other, $relationAddendum, $signature, $date);
my($requiredFields) = 1;
print "\n\n--- BEGIN PARSE TYPE 4 ---\n\n" if $debug > 0;
$initialText = &EDGAR::XML::decode($initialText);
$initialText = &EDGARdoc::util::fixText($initialText);
($header, $initialQuestions, $table1, $table2, $footer) =
getSectionsFor3_4_5($initialText);
if ($debug > 5) {
print "~~~~~~~~~~~~~~~~~~~~~~ HEADER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "$header\n";
print "~~~~~~~~~~~~~~~~~~~~~~ HEADER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "~~~~~~~~~~~~~~~~~~ INITIAL QUESTIONS ~~~~~~~~~~~~~~~~~~~~~~~\n";
print "$initialQuestions\n";
print "~~~~~~~~~~~~~~~~~~ INITIAL QUESTIONS ~~~~~~~~~~~~~~~~~~~~~~~\n";
print "~~~~~~~~~~~~~~~~~~~~~~ TABLE I ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "$table1\n";
print "~~~~~~~~~~~~~~~~~~~~~~ TABLE I ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "~~~~~~~~~~~~~~~~~~~~~~ TABLE II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "$table2\n";
print "~~~~~~~~~~~~~~~~~~~~~~ TABLE II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "~~~~~~~~~~~~~~~~~~~~~~ FOOTER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "$footer\n";
print "~~~~~~~~~~~~~~~~~~~~~~ FOOTER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
}
#------------------------------------------------------------------------
# HEADER PORTION
#------------------------------------------------------------------------
# get whether it's no longer subject to section 16
($notSubjectToSection16, $form3HoldingsReported,
$form4TransactionsReported) = &EDGARdoc::util::getFormHeaderInfo($header);
#------------------------------------------------------------------------
# INITIAL QUESTIONS
#------------------------------------------------------------------------
# get elements from the initial questions section
($firstName, $middleName, $lastName, $address, $city, $state, $zipCode,
$issuerName, $issuerTicker, $idInfo, $eventMonth, $eventDay, $eventYear,
$amendMonth, $amendDay, $amendYear, $director, $owner, $officer, $other,
$relationAddendum, $indFiling, $jointFiling) =
EDGARdoc::util::getFormInitialQuestionsInfo($initialQuestions);
# abort if we are missing required field(s)
unless ($firstName && $lastName && $address) {
warn "EDGAR::Forms::parse4: missing required field(s)...aborting!\n";
return "";
}
#------------------------------------------------------------------------
# TABLE I PORTION
#------------------------------------------------------------------------
# if we still have the header
if ($table1 =~ /INITIAL STATEMENT OF.*BENEFICIAL OWNERSHIP/sig) {
$table1 =~ s/.*?(TABLE I.*)/
$1/sig;
}
# we don't have TABLE I, we have TABLE II
if ($table1 =~ /TABLE II -+ Derivative Sec\w+s Beneficially Owned/si) {
($table1) = $initialText =~ /.*?(TABLE I.*)TABLE II.*/sig;
}
# get rid of pipes and underscores in data
$table1 =~ s/-\|-/---/g;
$table1 =~ s/\|/ /g;
$table1 =~ s/_/ /g;
# remove the table header, in case table tags were put in wrong spot
$table1 =~ s/.*TABLE (I|1) -+ Non-Derivative Securities.*?Owned.*?\n/
/si;
# remove extra footnote data for Table I
$table1 =~ s/\n\s*Reminder:\s+Report.*//si;
# replace blank lines with --- lines
$table1 =~ s/\n\s*\n/\n------------------------------------------\n/gs;
if ($debug > 5) {
print "\n\n===================== TABLE I SENT =======================\n";
print $table1;
print "\n===================== TABLE I SENT =======================\n\n\n";
}
($table1Rows, $table1Cols, @table1Arr) = &EDGARdoc::util::parseTable($table1);
if ($debug > 5) {
print "rows: $table1Rows\n";
print "columns: $table1Cols\n";
for ($rowCnt = 0; $rowCnt < $table1Rows; $rowCnt++) {
for ($colCnt = 0; $colCnt < $table1Cols; $colCnt++) {
print "item R $rowCnt, C $colCnt: $table1Arr[$rowCnt][$colCnt]\n";
}
}
}
#------------------------------------------------------------------------
# TABLE II PORTION
#------------------------------------------------------------------------
# remove the table header, in case table tags were put in wrong spot
$table2 =~ s/TABLE II - Derivative Securities Beneficially Owned.*?\n//i;
# we have TABLE I with TABLE II
if ($table2 =~ /TABLE I -+ Non-Derivative Sec\w+s Beneficially Owned/si) {
($table2) = $initialText =~ /.*?(TABLE II.*)/sig;
$table2 = "
" . $table2;
}
# get rid of pipes in data
$table2 =~ s/-\|-/---/g;
$table2 =~ s/\|/ /g;
$table2 =~ s/_/ /g;
# remove extra footnote data for Table I
$table2 =~ s/Explanation of Responses:.*//si;
if ($debug > 5) {
print "\n\n===================== TABLE II SENT =======================\n";
print $table2;
print "\n===================== TABLE II SENT =======================\n\n\n";
}
($table2Rows, $table2Cols, @table2Arr) = &EDGARdoc::util::parseTable($table2);
if ($debug > 5) {
print "rows: $table2Rows\n";
print "columns: $table2Cols\n";
for ($rowCnt = 0; $rowCnt < $table2Rows; $rowCnt++) {
for ($colCnt = 0; $colCnt < $table2Cols; $colCnt++) {
print "item R $rowCnt, C $colCnt: $table2Arr[$rowCnt][$colCnt]\n";
}
}
}
#------------------------------------------------------------------------
# FOOTER PORTION
#------------------------------------------------------------------------
($signature, $date) = EDGARdoc::util::getFormFooterInfo($footer);
if ($debug > 5) {
print "signature: $signature\n";
print "date: $date\n";
}
#------------------------------------------------------------------------
# verify the required fields exist
#------------------------------------------------------------------------
# filer first name
if (! ($firstName =~ /\w+/)) {
$requiredFields = 0;
warn "filer first name is required!\n";
}
# filer last name
if (! ($lastName =~ /\w+/)) {
$requiredFields = 0;
warn "filer last name is required!\n";
}
# filer street
if (! ($address =~ /\w+/)) {
$requiredFields = 0;
warn "filer street address is required!\n";
}
#------------------------------------------------------------------------
# XMLize the data
#------------------------------------------------------------------------
# verify required data is present
if ($requiredFields > 0) {
$textOut .= EDGARdoc::util::getBeneficial1StartXML($amendMonth, $amendYear,
$notSubjectToSection16, $form3HoldingsReported,
$form4TransactionsReported);
# HEADER
$textOut .= "\n";
# issuerName
$textOut .= "\n";
# filingType
if ($jointFiling) {
$textOut .= "\n";
} else {
$textOut .= "\n";
}
$textOut .= "\n";
# filer
if (($lastName =~ /\w+/) && ($firstName =~ /\w+/)) {
$textOut .= &EDGARdoc::util::getNameXML("filer", $firstName, $middleName,
$lastName);
# IRS id #
if ($idInfo =~ /\w+/) {
$textOut .= "". EDGAR::XML::trim($idInfo) ."\n";
}
# relation of reporting person
$textOut .= EDGARdoc::util::getRelationXML($director, $officer, $owner,
$other, $relationAddendum);
# address
$textOut .= "\n";
# postal
$textOut .= &EDGARdoc::util::getPostalXML($address, $city, $state, $zipCode);
$textOut .= "\n";
$textOut .= "\n";
}
$textOut .= &EDGARdoc::util::getDateXML("eventDate", $eventMonth, $eventDay,
$eventYear);
$textOut .= &EDGARdoc::util::getDateXML("originalDate", $amendMonth,
$amendDay, $amendYear);
$textOut .= "\n";
#
# XMLize TABLE I data
#
if ($table1Rows > 1) {
for ($rowCnt = 1; $rowCnt < $table1Rows; $rowCnt++) {
$textOut .= "\n";
# security title
if (($table1Arr[$rowCnt][0] =~ /\w+/) &&
($table1Arr[$rowCnt][0] !~ /Title of Security/)) {
$textOut .= "";
$textOut .= EDGAR::XML::trim($table1Arr[$rowCnt][0]) . "\n";
}
# ownership form
$textOut .= EDGARdoc::util::getOwnershipFormXML($table1Arr[$rowCnt][8]);
# nature of ownership
$textOut .= EDGARdoc::util::getNatureOfOwnershipXML($table1Arr[$rowCnt][9]);
if ($table1Arr[$rowCnt][1] =~ /\w+/) {
($month, $day, $year) = &EDGARdoc::util::getDateParts($table1Arr[$rowCnt][1]);
$textOut .= &EDGARdoc::util::getDateXML("transactionDate", $month, $day, $year);
}
if ($table1Arr[$rowCnt][2] =~ /\w+/) {
$textOut .= "\n";
}
$table1Arr[$rowCnt][4] = EDGARdoc::util::toNumber($table1Arr[$rowCnt][3]);
if ($table1Arr[$rowCnt][4] =~ /\w+/) {
$textOut .= "\n";
}
# exercisable date
$table2Arr[$rowCnt][7] =~ s/-//gs;
if ($table2Arr[$rowCnt][6] =~ /\w+/) {
($month, $day, $year) = &EDGARdoc::util::getDateParts($table2Arr[$rowCnt][7]);
$textOut .= &EDGARdoc::util::getDateXML("exercisableDate", $month, $day, $year);
}
# expiration date
if ($table2Arr[$rowCnt][8] =~ /\w+/) {
($month, $day, $year) = &EDGARdoc::util::getDateParts($table2Arr[$rowCnt][8]);
$textOut .= &EDGARdoc::util::getDateXML("expirationDate", $month, $day, $year);
}
# underlying security
if ($table2Arr[$rowCnt][9] =~ /\w+/) {
$textOut .= "\n";
# amount of underlying security
$table2Arr[$rowCnt][10] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][10]);
if ($table2Arr[$rowCnt][10] =~ /\w+/) {
$textOut .= " ";
$textOut .= $table2Arr[$rowCnt][10];
$textOut .= "\n";
}
$textOut .= "\n";
}
# price
$table2Arr[$rowCnt][1] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][1]);
if ($table2Arr[$rowCnt][1] =~ /\w+/) {
$textOut .= "\n";
}
# ownership form
$textOut .= EDGARdoc::util::getOwnershipFormXML($table2Arr[$rowCnt][13]);
# nature of ownership
$textOut .= EDGARdoc::util::getNatureOfOwnershipXML($table2Arr[$rowCnt][14]);
$table2Arr[$rowCnt][11] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][12]);
if ($table2Arr[$rowCnt][12] =~ /\w+/) {
$textOut .= "";
$textOut .= $table2Arr[$rowCnt][12];
$textOut .= "\n";
}
if ($table2Arr[$rowCnt][2] =~ /\w+/) {
($month, $day, $year) = EDGARdoc::util::getDateParts($table2Arr[$rowCnt][2]);
$textOut .= &EDGARdoc::util::getDateXML("transactionDate", $month, $day, $year);
}
if ($table2Arr[$rowCnt][3] =~ /\w+/) {
$textOut .= "\n";
}
$table2Arr[$rowCnt][4] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][5]);
if ($table2Arr[$rowCnt][5] =~ /\w+/) {
$textOut .= "\n";
}
($month, $day, $year) = EDGARdoc::util::getDateParts($date);
if (($month =~ /\w+/) && ($year =~ /\w+/)) {
$textOut .= EDGARdoc::util::getDateXML("signatureDate", $month, $day, $year);
}
$textOut .= "\n";
$textOut .= "\n";
} else {
# required fields aren't present, return an empty string
$textOut = "";
if ($debug > 0) {
print "\n\nERROR: Required Fields are not present.\n";
}
}
if ($debug > 0) {
print "======================== TEXT OUT ===========================\n";
print $textOut;
print "\n======================== TEXT OUT ===========================\n";
print "\n\n--- END PARSE TYPE 4 ---\n\n";
}
$textOut = &EDGAR::XML::encode($textOut);
$textOut;
}
#
# EDGAR document type 5
#
sub edgarParse5
{
my($initialText) = shift;
my($text, $textOut);
# vars for main "chunks" of data
my($header, $initialQuestions, $table1, $table2, $footer);
my($notSubjectToSection16, $form3HoldingsReported,
$form4TransactionsReported);
my($idInfo, $addressInfo, $issuerName, $eventMonth, $eventDay, $eventYear,
$amendMonth, $amendDay, $amendYear, $director, $owner, $officer,
$other, $relationAddendum, $signature, $date);
my($requiredFields) = 1;
print "\n\n--- BEGIN PARSE TYPE 5 ---\n\n" if $debug > 0;
$initialText = &EDGAR::XML::decode($initialText);
$initialText = &EDGARdoc::util::fixText($initialText);
($header, $initialQuestions, $table1, $table2, $footer) =
getSectionsFor3_4_5($initialText);
if ($debug > 5) {
print "~~~~~~~~~~~~~~~~~~~~~~ HEADER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "$header\n";
print "~~~~~~~~~~~~~~~~~~~~~~ HEADER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "~~~~~~~~~~~~~~~~~~ INITIAL QUESTIONS ~~~~~~~~~~~~~~~~~~~~~~~\n";
print "$initialQuestions\n";
print "~~~~~~~~~~~~~~~~~~ INITIAL QUESTIONS ~~~~~~~~~~~~~~~~~~~~~~~\n";
print "~~~~~~~~~~~~~~~~~~~~~~ TABLE I ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "$table1\n";
print "~~~~~~~~~~~~~~~~~~~~~~ TABLE I ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "~~~~~~~~~~~~~~~~~~~~~~ TABLE II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "$table2\n";
print "~~~~~~~~~~~~~~~~~~~~~~ TABLE II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "~~~~~~~~~~~~~~~~~~~~~~ FOOTER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
print "$footer\n";
print "~~~~~~~~~~~~~~~~~~~~~~ FOOTER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n";
}
#------------------------------------------------------------------------
# HEADER PORTION
#------------------------------------------------------------------------
# get whether it's no longer subject to section 16
($notSubjectToSection16, $form3HoldingsReported,
$form4TransactionsReported) = &EDGARdoc::util::getFormHeaderInfo($header);
#------------------------------------------------------------------------
# INITIAL QUESTIONS
#------------------------------------------------------------------------
# get elements from the initial questions section
($firstName, $middleName, $lastName, $address, $city, $state, $zipCode,
$issuerName, $issuerTicker, $idInfo, $eventMonth, $eventDay, $eventYear,
$amendMonth, $amendDay, $amendYear, $director, $owner, $officer, $other,
$relationAddendum, $indFiling, $jointFiling) =
EDGARdoc::util::getFormInitialQuestionsInfo($initialQuestions);
# abort if we are missing required field(s)
unless ($firstName && $lastName && $address) {
warn "EDGAR::Forms::parse5: missing required field(s)...aborting!\n";
return "";
}
#------------------------------------------------------------------------
# TABLE I PORTION
#------------------------------------------------------------------------
# if we still have the header
if ($table1 =~ /INITIAL STATEMENT OF.*BENEFICIAL OWNERSHIP/sig) {
$table1 =~ s/.*?(TABLE I.*)/
$1/sig;
}
# we don't have TABLE I, we have TABLE II
if ($table1 =~ /TABLE II -+ Derivative Sec\w+s Beneficially Owned/si) {
($table1) = $initialText =~ /.*?(TABLE I.*)TABLE II.*/sig;
}
# get rid of pipes and underscores in data
$table1 =~ s/-\|-/---/g;
$table1 =~ s/\|/ /g;
$table1 =~ s/_/ /g;
# remove the table header, in case table tags were put in wrong spot
$table1 =~ s/.*TABLE (I|1) -+ Non-Derivative Securities.*?Owned.*?\n/
/si;
# remove extra footnote data for Table I
$table1 =~ s/\n\s*Reminder:\s+Report.*//si;
# replace blank lines with --- lines
$table1 =~ s/\n\s*\n/\n------------------------------------------\n/gs;
if ($debug > 5) {
print "\n\n===================== TABLE I SENT =======================\n";
print $table1;
print "\n===================== TABLE I SENT =======================\n\n\n";
}
($table1Rows, $table1Cols, @table1Arr) = &EDGARdoc::util::parseTable($table1);
if ($debug > 5) {
print "rows: $table1Rows\n";
print "columns: $table1Cols\n";
for ($rowCnt = 0; $rowCnt < $table1Rows; $rowCnt++) {
for ($colCnt = 0; $colCnt < $table1Cols; $colCnt++) {
print "item R $rowCnt, C $colCnt: $table1Arr[$rowCnt][$colCnt]\n";
}
}
}
#------------------------------------------------------------------------
# TABLE II PORTION
#------------------------------------------------------------------------
# remove the table header, in case table tags were put in wrong spot
$table2 =~ s/TABLE II - Derivative Securities Beneficially Owned.*?\n//i;
# we have TABLE I with TABLE II
if ($table2 =~ /TABLE I -+ Non-Derivative Sec\w+s Beneficially Owned/si) {
($table2) = $initialText =~ /.*?(TABLE II.*)/sig;
$table2 = "
" . $table2;
}
# get rid of pipes in data
$table2 =~ s/-\|-/---/g;
$table2 =~ s/\|/ /g;
$table2 =~ s/_/ /g;
# remove extra footnote data for Table I
$table2 =~ s/Explanation of Responses:.*//si;
if ($debug > 5) {
print "\n\n===================== TABLE II SENT =======================\n";
print $table2;
print "\n===================== TABLE II SENT =======================\n\n\n";
}
($table2Rows, $table2Cols, @table2Arr) = &EDGARdoc::util::parseTable($table2);
if ($debug > 5) {
print "rows: $table2Rows\n";
print "columns: $table2Cols\n";
for ($rowCnt = 0; $rowCnt < $table2Rows; $rowCnt++) {
for ($colCnt = 0; $colCnt < $table2Cols; $colCnt++) {
print "item R $rowCnt, C $colCnt: $table2Arr[$rowCnt][$colCnt]\n";
}
}
}
#------------------------------------------------------------------------
# FOOTER PORTION
#------------------------------------------------------------------------
($signature, $date) = EDGARdoc::util::getFormFooterInfo($footer);
if ($debug > 5) {
print "signature: $signature\n";
print "date: $date\n";
}
#------------------------------------------------------------------------
# verify the required fields exist
#------------------------------------------------------------------------
# filer first name
if (! ($firstName =~ /\w+/)) {
$requiredFields = 0;
warn "filer first name is required!\n";
}
# filer last name
if (! ($lastName =~ /\w+/)) {
$requiredFields = 0;
warn "filer last name is required!\n";
}
# filer street
if (! ($address =~ /\w+/)) {
$requiredFields = 0;
warn "filer street address is required!\n";
}
#------------------------------------------------------------------------
# XMLize the data
#------------------------------------------------------------------------
# verify required data is present
if ($requiredFields > 0) {
$textOut .= EDGARdoc::util::getBeneficial1StartXML($amendMonth, $amendYear,
$notSubjectToSection16, $form3HoldingsReported,
$form4TransactionsReported);
# HEADER
$textOut .= "\n";
# issuerName
$textOut .= "\n";
# filingType
if ($jointFiling) {
$textOut .= "\n";
} else {
$textOut .= "\n";
}
$textOut .= "\n";
# filer
if (($lastName =~ /\w+/) && ($firstName =~ /\w+/)) {
$textOut .= &EDGARdoc::util::getNameXML("filer", $firstName, $middleName,
$lastName);
# IRS id #
if ($idInfo =~ /\w+/) {
$textOut .= "". EDGAR::XML::trim($idInfo) ."\n";
}
# relation of reporting person
$textOut .= EDGARdoc::util::getRelationXML($director, $officer, $owner,
$other, $relationAddendum);
# address
$textOut .= "\n";
# postal
$textOut .= &EDGARdoc::util::getPostalXML($address, $city, $state, $zipCode);
$textOut .= "\n";
$textOut .= "\n";
}
$textOut .= &EDGARdoc::util::getDateXML("eventDate", $eventMonth, $eventDay,
$eventYear);
$textOut .= &EDGARdoc::util::getDateXML("originalDate", $amendMonth,
$amendDay, $amendYear);
$textOut .= "\n";
#
# XMLize TABLE I data
#
if ($table1Rows > 1) {
for ($rowCnt = 2; $rowCnt < $table1Rows; $rowCnt++) {
$textOut .= "\n";
# security title
if (($table1Arr[$rowCnt][0] =~ /\w+/) &&
($table1Arr[$rowCnt][0] !~ /Title of Security/)) {
$textOut .= "";
$textOut .= EDGAR::XML::trim($table1Arr[$rowCnt][0]) . "\n";
}
# ownership form
$textOut .= EDGARdoc::util::getOwnershipFormXML($table1Arr[$rowCnt][7]);
# nature of ownership
$textOut .= EDGARdoc::util::getNatureOfOwnershipXML($table1Arr[$rowCnt][8]);
if ($table1Arr[$rowCnt][1] =~ /\w+/) {
($month, $day, $year) = &EDGARdoc::util::getDateParts($table1Arr[$rowCnt][1]);
$textOut .= &EDGARdoc::util::getDateXML("transactionDate", $month, $day, $year);
}
if ($table1Arr[$rowCnt][2] =~ /\w+/) {
$textOut .= "\n";
}
$table1Arr[$rowCnt][3] = EDGARdoc::util::toNumber($table1Arr[$rowCnt][3]);
if ($table1Arr[$rowCnt][3] =~ /\w+/) {
$textOut .= "\n";
}
# exercisable date
$table2Arr[$rowCnt][6] =~ s/-//gs;
if ($table2Arr[$rowCnt][6] =~ /\w+/) {
($month, $day, $year) = &EDGARdoc::util::getDateParts($table2Arr[$rowCnt][6]);
$textOut .= &EDGARdoc::util::getDateXML("exercisableDate", $month, $day, $year);
}
# expiration date
if ($table2Arr[$rowCnt][7] =~ /\w+/) {
($month, $day, $year) = &EDGARdoc::util::getDateParts($table2Arr[$rowCnt][7]);
$textOut .= &EDGARdoc::util::getDateXML("expirationDate", $month, $day, $year);
}
# underlying security
if ($table2Arr[$rowCnt][8] =~ /\w+/) {
$textOut .= "\n";
# amount of underlying security
$table2Arr[$rowCnt][9] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][9]);
if ($table2Arr[$rowCnt][9] =~ /\w+/) {
$textOut .= " ";
$textOut .= $table2Arr[$rowCnt][9];
$textOut .= "\n";
}
$textOut .= "\n";
}
# price
$table2Arr[$rowCnt][1] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][1]);
if ($table2Arr[$rowCnt][1] =~ /\w+/) {
$textOut .= "\n";
}
# ownership form
$textOut .= EDGARdoc::util::getOwnershipFormXML($table2Arr[$rowCnt][12]);
# nature of ownership
$textOut .= EDGARdoc::util::getNatureOfOwnershipXML($table2Arr[$rowCnt][13]);
$table2Arr[$rowCnt][11] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][11]);
if ($table2Arr[$rowCnt][11] =~ /\w+/) {
$textOut .= "";
$textOut .= $table2Arr[$rowCnt][11];
$textOut .= "\n";
}
if ($table2Arr[$rowCnt][2] =~ /\w+/) {
($month, $day, $year) = &EDGARdoc::util::getDateParts($table2Arr[$rowCnt][2]);
$textOut .= &EDGARdoc::util::getDateXML("transactionDate", $month, $day, $year);
}
if ($table2Arr[$rowCnt][3] =~ /\w+/) {
$textOut .= "\n";
}
$table2Arr[$rowCnt][4] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][4]);
if ($table2Arr[$rowCnt][4] =~ /\w+/) {
$textOut .= "\n";
}
($month, $day, $year) = &EDGARdoc::util::getDateParts($date);
if (($month =~ /\w+/) && ($year =~ /\w+/)) {
$textOut .= &EDGARdoc::util::getDateXML("signatureDate", $month, $day, $year);
}
$textOut .= "\n";
$textOut .= "\n";
} else {
# required fields aren't present, return an empty string
$textOut = "";
if ($debug > 0) {
print "\n\nERROR: Required Fields are not present.\n";
}
}
if ($debug > 0) {
print "======================== TEXT OUT ===========================\n";
print $textOut;
print "\n======================== TEXT OUT ===========================\n";
print "\n\n--- END PARSE TYPE 5 ---\n\n";
}
$textOut = &EDGAR::XML::encode($textOut);
$textOut;
}
# parse document of type 144
sub edgarParse144
{
local($text) = @_;
$text = &EDGAR::XML::decode($text);
$text = &EDGARdoc::util::fixText($text);
# Check the format of the header
if($text =~ /1\(a\)\s*NAME OF ISSUER.*\(c\) S.E.C. FILE NO./i) {
$doctype = 1;
} else {
$doctype = 2;
}
# Code to handle document of type 1
if ($doctype == 1) {
# Start extracting required data
# Processing HEADER
# ($table) = ($text =~ /(
.*?<\/TABLE>)/s);
# ($table) = ($table =~ /(
.*)1\(d\).*/s);
# $reqdata = EDGARdoc::util::parseTable($table);
($line1) = ($text =~ /1\(a\) NAME OF ISSUER.*S.E.C. FILE NO.\n(.*?-{3,})/si);
$line1 =~ s/-{3,}//;
$line1 =~ s/\n//gsi;
($line1) = ($line1 =~ /\s*(.*)/);
($issuerName, $irsId, $fileNo) = split(/\s{3,}/, $line1);
($line2) = ($text =~ /1\(d\) ADDRESS OF ISSUER.*TELEPHONE NO.\n(.*)2\(a\)/si);
$line2 =~ s/-{3,}//gs;
$line2 =~ s/AREA//gsi;
$line2 =~ s/NUMBER//gsi;
$line2 =~ s/CODE//gsi;
$line2 =~ s/\n//gs;
($line2) = ($line2 =~ /\s*(.*)/);
($street, $city, $state, $zip, $area, $number) = split(/\s{3,}/, $line2);
($line3) = ($text =~ /2\(a\) NAME OF PERSON FOR WHOSE ACCOUNT.*IDENT. NO.(.*?-{3,})/si);
$line3 =~ s/-{3,}//gs;
$line3 =~ s/\n//gs;
($line3) = ($line3 =~ /\s*(.*)/);
($sellerName, $sellerirsid, $relationship, $sellerAddress) = split(/\s{3,}/, $line3);
if ($debug > 1) {
print "-----------------------------------------------------------\n";
print " HEADER \n";
print "-----------------------------------------------------------\n";
print "Issuer Name = $issuerName\n";
print "IRS ID = $irsId\n";
print "SEC File Number = $fileNo\n";
print "Street = $street\n";
print "City = $city\n";
print "State = $state\n";
print "Seller Name = $sellerName\n";
print "Relationship = $relationship\n";
print "Seller Address = $sellerAddress\n";
}
}
# Code to parse the various tables in the form
# Parsing the first table...
($table1) = ($text =~ /2.*\s*\(a\).*(
.*3\s*\(a\).*?<\/TABLE>)/si);
$table1 =~ s/\n*
\n*//;
$table1 =~ s/\n*<\/TABLE>\n*//;
($maxrow, $maxcol, @data) = EDGARdoc::util::parseTable($table1);
# Extract the required data from the 2-dim array
# Extracting security title
$securityTitle = EDGARdoc::util::getTableData('(a)', $maxrow, $maxcol, @data);
# Extracting NAme of each Broker
$brokerName = EDGARdoc::util::getTableData('Each', $maxrow, $maxcol, @data);
# Extracting Number of Shares or other Units to be sold
$numberShares = EDGARdoc::util::getTableData('Number of', $maxrow, $maxcol, @data);
# Extracting Aggregate Market Value
$marketValue = EDGARdoc::util::getTableData('grega', $maxrow, $maxcol, @data);
# Extracting Number of shares or other units outstanding
$numberOutstanding = EDGARdoc::util::getTableData('Outstanding', $maxrow, $maxcol, @data);
# Extracting Approximate Date of Sale
$saleDate = EDGARdoc::util::getTableData('Approximate', $maxrow, $maxcol, @data);
# Extracting Name of each Securities Exchange
$securitiesExchange = EDGARdoc::util::getTableData('Exchange', $maxrow, $maxcol, @data);
# Parsing TABLE - I
($tableI) = ($text =~ /(TABLE I.*?