# # file: edgar-forms.pl # desc: parsed-edgar forms support # # limit scope package EDGAR::Forms; BEGIN { my $iw_root = ($ENV{IW}) ? $ENV{IW} : "/iw"; my $edgar_root = "$iw_root/skulker/edgar/current"; # where we find our local libraries my($libdir) = "$edgar_root/lib"; if ( -d $libdir) { unshift(@INC, $libdir); } # general EDGAR utility support routines require 'edgardoc-util.pl'; } $debug = 0; # map form type to parsing function %formType = ( '144' => 'edgarParse144', '3' => 'edgarParse3', '4' => 'edgarParse4', '5' => 'edgarParse5', 'SC 13D' => 'edgarParseSC13D', 'SC 13D/A' => 'edgarParseSC13D', 'SC 13G' => 'edgarParseSC13G', 'SC 13G/A' => 'edgarParseSC13G', ); # map form type to IW class %form2class = ( '144' => 'Beneficial Ownership', '3' => 'Beneficial Ownership', '4' => 'Beneficial Ownership', '5' => 'Beneficial Ownership', 'SC 13D' => 'Beneficial Ownership', 'SC 13D/A' => 'Beneficial Ownership', 'SC 13G' => 'Beneficial Ownership', 'SC 13G/A' => 'Beneficial Ownership', ); # map form type to HTML generator function (does this belong here? - bjb) # these are for html parsing routines called in edgar-html.pl %htmlType = ( '144' => 'edgarBeneficial1', '3' => 'edgarBeneficial1', '4' => 'edgarBeneficial1', '5' => 'edgarBeneficial1', 'SC 13D' => 'edgarBeneficial2', 'SC 13D/A' => 'edgarBeneficial2', 'SC 13G' => 'edgarBeneficial2', 'SC 13G/A' => 'edgarBeneficial2', ); # # parse specified EDGAR form type # sub parse { my $type = shift; my $text = shift; my $xml = ""; # get the name of the subroutine that corresponds to the type of document $parseFunc = $formType{$type}; # ignore document types we don't handle yet - bjb if (! $parseFunc) { return ""; } # call the subroutine, '$parseFunc' and get the parsed output. $xml = &$parseFunc($text); # return the XML encoded parsed-edgar data ... for now $xml; } # # pull main sections out for forms 3,4,5 # sub getSectionsFor3_4_5 { my($initialText) = shift; my($header, $initialQuestions, $table1, $table2, $footer); # some files have the title info in a table, get the tags out if ($initialText =~ /.*INITIAL STATEMENT OF BENEFICIAL OWNERSHIP OF SECURITIES.*<\/TABLE>/si) { $initialText =~ s@
(.*INITIAL STATEMENT OF BENEFICIAL OWNERSHIP OF SECURITIES.*?)
@$1@si; } # get header stuff ($header) = $initialText =~ /(.*?)1.\s*Name and Address/gis; # get initial data stuff ($initialQuestions) = $initialText =~ /.*?(\|*?\s*?1.\s*Name and Address.*?)TABLE\s*(1|I)/gis; # get table I stuff ($table1) = $initialText =~ /.*?(TABLE (I|1).*?)TABLE II.*/gis; # get table II stuff ($table2) = $initialText =~ /.*?(TABLE II.*)Explanation of Responses.*/gis; # get footer stuff ($footer) = $initialText =~ /.*Explanation of Responses:(.*)/is; return($header, $initialQuestions, $table1, $table2, $footer); } # # EDGAR document type 3 # sub edgarParse3 { my($initialText) = shift; my($text, $textOut, $idInfo, $addressInfo, $issuerName); my($eventMonth, $eventDay, $eventYear, $amendMonth, $amendDay, $amendYear, $director, $owner, $officer, $other, $relationAddendum); my($table1, $table2, $footer, $signature, $date); my($notSubjectToSection16, $form3HoldingsReported, $form4TransactionsReported); my($requiredFields) = 1; print "\n\n--- BEGIN PARSE TYPE 3 ---\n\n" if $debug > 0; $initialText = &EDGAR::XML::decode($initialText); $initialText = &EDGARdoc::util::fixText($initialText); ($header, $initialQuestions, $table1, $table2, $footer) = getSectionsFor3_4_5($initialText); if ($debug > 5) { print "~~~~~~~~~~~~~~~~~~~~~~ HEADER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "$header\n"; print "~~~~~~~~~~~~~~~~~~~~~~ HEADER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "~~~~~~~~~~~~~~~~~~ INITIAL QUESTIONS ~~~~~~~~~~~~~~~~~~~~~~~\n"; print "$initialQuestions\n"; print "~~~~~~~~~~~~~~~~~~ INITIAL QUESTIONS ~~~~~~~~~~~~~~~~~~~~~~~\n"; print "~~~~~~~~~~~~~~~~~~~~~~ TABLE I ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "$table1\n"; print "~~~~~~~~~~~~~~~~~~~~~~ TABLE I ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "~~~~~~~~~~~~~~~~~~~~~~ TABLE II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "$table2\n"; print "~~~~~~~~~~~~~~~~~~~~~~ TABLE II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "~~~~~~~~~~~~~~~~~~~~~~ FOOTER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "$footer\n"; print "~~~~~~~~~~~~~~~~~~~~~~ FOOTER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; } #------------------------------------------------------------------------ # HEADER PORTION #------------------------------------------------------------------------ # get whether it's no longer subject to section 16 ($notSubjectToSection16, $form3HoldingsReported, $form4TransactionsReported) = &EDGARdoc::util::getFormHeaderInfo($header); #------------------------------------------------------------------------ # INITIAL QUESTIONS PORTION #------------------------------------------------------------------------ # get elements from the initial questions section ($firstName, $middleName, $lastName, $address, $city, $state, $zipCode, $issuerName, $issuerTicker, $idInfo, $eventMonth, $eventDay, $eventYear, $amendMonth, $amendDay, $amendYear, $director, $owner, $officer, $other, $relationAddendum, $indFiling, $jointFiling) = EDGARdoc::util::getFormInitialQuestionsInfo($initialQuestions); # abort if we are missing required field(s) unless ($firstName && $lastName && $address) { warn "EDGAR::Forms::parse3: missing required field(s)...aborting!\n"; return ""; } #------------------------------------------------------------------------ # TABLE I PORTION #------------------------------------------------------------------------ # if we still have the header if ($table1 =~ /INITIAL STATEMENT OF.*BENEFICIAL OWNERSHIP/sig) { $table1 =~ s/.*?(TABLE I.*)/$1/sig; } # we don't have TABLE I, we have TABLE II if ($table1 =~ /TABLE II -+ Derivative Sec\w+s Beneficially Owned/si) { ($table1) = $initialText =~ /.*?(TABLE I.*)TABLE II.*/sig; } # get rid of pipes and underscores in data $table1 =~ s/-\|-/---/g; $table1 =~ s/\|/ /g; $table1 =~ s/_/ /g; # remove the table header, in case table tags were put in wrong spot $table1 =~ s/.*TABLE (I|1) -+ Non-Derivative Securities.*?Owned.*?\n//si; # remove extra footnote data for Table I $table1 =~ s/\n\s*Reminder:\s+Report.*//si; # replace blank lines with --- lines $table1 =~ s/\n\s*\n/\n------------------------------------------\n/gs; if ($debug > 5) { print "\n\n===================== TABLE I SENT =======================\n"; print $table1; print "\n===================== TABLE I SENT =======================\n\n\n"; } ($table1Rows, $table1Cols, @table1Arr) = &EDGARdoc::util::parseTable($table1); if ($debug > 5) { print "rows: $table1Rows\n"; print "columns: $table1Cols\n"; for ($rowCnt = 0; $rowCnt < $table1Rows; $rowCnt++) { for ($colCnt = 0; $colCnt < $table1Cols; $colCnt++) { print "item R $rowCnt, C $colCnt: $table1Arr[$rowCnt][$colCnt]\n"; } } } #------------------------------------------------------------------------ # TABLE II PORTION #------------------------------------------------------------------------ # remove the table header, in case table tags were put in wrong spot $table2 =~ s/TABLE II - Derivative Securities Beneficially Owned.*?\n//i; # we have TABLE I with TABLE II if ($table2 =~ /TABLE I -+ Non-Derivative Sec\w+s Beneficially Owned/si) { ($table2) = $initialText =~ /.*?(TABLE II.*)/sig; $table2 = "" . $table2; } # get rid of pipes in data $table2 =~ s/-\|-/---/g; $table2 =~ s/\|/ /g; $table2 =~ s/_/ /g; # remove extra footnote data for Table I $table2 =~ s/Explanation of Responses:.*//si; if ($debug > 5) { print "\n\n===================== TABLE II SENT =======================\n"; print $table2; print "\n===================== TABLE II SENT =======================\n\n\n"; } ($table2Rows, $table2Cols, @table2Arr) = &EDGARdoc::util::parseTable($table2); if ($debug > 5) { print "rows: $table2Rows\n"; print "columns: $table2Cols\n"; for ($rowCnt = 0; $rowCnt < $table2Rows; $rowCnt++) { for ($colCnt = 0; $colCnt < $table2Cols; $colCnt++) { print "item R $rowCnt, C $colCnt: $table2Arr[$rowCnt][$colCnt]\n"; } } } #------------------------------------------------------------------------ # FOOTER PORTION #------------------------------------------------------------------------ ($signature, $date) = EDGARdoc::util::getFormFooterInfo($footer); if ($debug > 5) { print "signature: $signature\n"; print "date: $date\n"; } #------------------------------------------------------------------------ # verify the required fields exist #------------------------------------------------------------------------ # filer first name if (! ($firstName =~ /\w+/)) { $requiredFields = 0; warn "filer first name is required!\n"; } # filer last name if (! ($lastName =~ /\w+/)) { $requiredFields = 0; warn "filer last name is required!\n"; } # filer street if (! ($address =~ /\w+/)) { $requiredFields = 0; warn "filer street address is required!\n"; } #------------------------------------------------------------------------ # XMLize the data #------------------------------------------------------------------------ # verify required data is present if ($requiredFields > 0) { $textOut .= EDGARdoc::util::getBeneficial1StartXML($amendMonth, $amendYear, $notSubjectToSection16, $form3HoldingsReported, $form4TransactionsReported); # HEADER $textOut .= "\n"; # issuerName $textOut .= "\n"; # filingType if ($jointFiling) { $textOut .= "\n"; } else { $textOut .= "\n"; } $textOut .= "\n"; # filer if (($lastName =~ /\w+/) && ($firstName =~ /\w+/)) { $textOut .= &EDGARdoc::util::getNameXML("filer", $firstName, $middleName, $lastName); # IRS id # if ($idInfo =~ /\w+/) { $textOut .= "". EDGAR::XML::trim($idInfo) ."\n"; } # relation of reporting person $textOut .= EDGARdoc::util::getRelationXML($director, $officer, $owner, $other, $relationAddendum); # address $textOut .= "
\n"; # postal $textOut .= &EDGARdoc::util::getPostalXML($address, $city, $state, $zipCode); $textOut .= "
\n"; $textOut .= "\n"; } $textOut .= &EDGARdoc::util::getDateXML("eventDate", $eventMonth, $eventDay, $eventYear); $textOut .= &EDGARdoc::util::getDateXML("originalDate", $amendMonth, $amendDay, $amendYear); $textOut .= "
\n"; # # XMLize TABLE I data # if ($table1Rows > 1) { for ($rowCnt = 1; $rowCnt < $table1Rows; $rowCnt++) { $textOut .= "\n"; # security title if (($table1Arr[$rowCnt][0] =~ /\w+/) && ($table1Arr[$rowCnt][0] !~ /Title of Security/)) { $textOut .= "" . EDGAR::XML::trim($table1Arr[$rowCnt][0]); $textOut .= "\n"; } # ownership form $textOut .= EDGARdoc::util::getOwnershipFormXML($table1Arr[$rowCnt][2]); # nature of ownership $textOut .= EDGARdoc::util::getNatureOfOwnershipXML($table1Arr[$rowCnt][3]); # amount owned $table1Arr[$rowCnt][1] = EDGARdoc::util::toNumber($table1Arr[$rowCnt][1]); if ($table1Arr[$rowCnt][1] =~ /\w+/) { $textOut .= ""; $textOut .= $table1Arr[$rowCnt][1]; $textOut .= "\n"; } $textOut .= "\n"; } } # # XMLize TABLE II data # if ($table2Rows > 3) { for ($rowCnt = 3; $rowCnt < $table2Rows; $rowCnt++) { if ($table2Arr[$rowCnt][0] !~ /Title of Derivative Security/si) { $textOut .= "\n"; # security title $table2Arr[$rowCnt][0] =~ s/-//gs; if ($table2Arr[$rowCnt][0] =~ /\w+/) { $textOut .= ""; $textOut .= EDGAR::XML::trim($table2Arr[$rowCnt][0]) . "\n"; } # exercisable date $table2Arr[$rowCnt][1] =~ s/-//gs; if ($table2Arr[$rowCnt][1] =~ /\w+/) { ($month, $day, $year) = &EDGARdoc::util::getDateParts($table2Arr[$rowCnt][1]); $textOut .= &EDGARdoc::util::getDateXML("exercisableDate", $month, $day, $year); } # expiration date if ($table2Arr[$rowCnt][2] =~ /\w+/) { ($month, $day, $year) = &EDGARdoc::util::getDateParts($table2Arr[$rowCnt][2]); $textOut .= &EDGARdoc::util::getDateXML("expirationDate", $month, $day, $year); } # underlying security if ($table2Arr[$rowCnt][3] =~ /\w+/) { $textOut .= "\n"; # amount of underlying security $table2Arr[$rowCnt][4] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][4]); if ($table2Arr[$rowCnt][4] =~ /\w+/) { $textOut .= " "; $textOut .= $table2Arr[$rowCnt][4]; $textOut .= "\n"; } $textOut .= "\n"; } # price $table2Arr[$rowCnt][5] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][5]); if ($table2Arr[$rowCnt][5] =~ /\w+/) { $textOut .= "\n"; } # ownership form $textOut .= EDGARdoc::util::getOwnershipFormXML($table2Arr[$rowCnt][6]); # nature of ownership $textOut .= EDGARdoc::util::getNatureOfOwnershipXML($table2Arr[$rowCnt][7]); $textOut .= "\n"; } } } # # footer # $textOut .= "\n"; $textOut .= "\n"; } else { # required fields aren't present, return an empty string $textOut = ""; if ($debug > 0) { print "\n\nERROR: Required Fields are not present.\n"; } } if ($debug > 0) { print "======================== TEXT OUT ===========================\n"; print $textOut; print "\n======================== TEXT OUT ===========================\n"; print "\n\n--- END PARSE TYPE 3 ---\n\n"; } $textOut = &EDGAR::XML::encode($textOut); $textOut; } # # EDGAR document type 4 # sub edgarParse4 { my($initialText) = shift; my($text, $textOut); # vars for main "chunks" of data my($header, $initialQuestions, $table1, $table2, $footer); my($notSubjectToSection16, $form3HoldingsReported, $form4TransactionsReported); my($idInfo, $addressInfo, $issuerName, $eventMonth, $eventDay, $eventYear, $amendMonth, $amendDay, $amendYear, $director, $owner, $officer, $other, $relationAddendum, $signature, $date); my($requiredFields) = 1; print "\n\n--- BEGIN PARSE TYPE 4 ---\n\n" if $debug > 0; $initialText = &EDGAR::XML::decode($initialText); $initialText = &EDGARdoc::util::fixText($initialText); ($header, $initialQuestions, $table1, $table2, $footer) = getSectionsFor3_4_5($initialText); if ($debug > 5) { print "~~~~~~~~~~~~~~~~~~~~~~ HEADER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "$header\n"; print "~~~~~~~~~~~~~~~~~~~~~~ HEADER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "~~~~~~~~~~~~~~~~~~ INITIAL QUESTIONS ~~~~~~~~~~~~~~~~~~~~~~~\n"; print "$initialQuestions\n"; print "~~~~~~~~~~~~~~~~~~ INITIAL QUESTIONS ~~~~~~~~~~~~~~~~~~~~~~~\n"; print "~~~~~~~~~~~~~~~~~~~~~~ TABLE I ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "$table1\n"; print "~~~~~~~~~~~~~~~~~~~~~~ TABLE I ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "~~~~~~~~~~~~~~~~~~~~~~ TABLE II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "$table2\n"; print "~~~~~~~~~~~~~~~~~~~~~~ TABLE II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "~~~~~~~~~~~~~~~~~~~~~~ FOOTER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "$footer\n"; print "~~~~~~~~~~~~~~~~~~~~~~ FOOTER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; } #------------------------------------------------------------------------ # HEADER PORTION #------------------------------------------------------------------------ # get whether it's no longer subject to section 16 ($notSubjectToSection16, $form3HoldingsReported, $form4TransactionsReported) = &EDGARdoc::util::getFormHeaderInfo($header); #------------------------------------------------------------------------ # INITIAL QUESTIONS #------------------------------------------------------------------------ # get elements from the initial questions section ($firstName, $middleName, $lastName, $address, $city, $state, $zipCode, $issuerName, $issuerTicker, $idInfo, $eventMonth, $eventDay, $eventYear, $amendMonth, $amendDay, $amendYear, $director, $owner, $officer, $other, $relationAddendum, $indFiling, $jointFiling) = EDGARdoc::util::getFormInitialQuestionsInfo($initialQuestions); # abort if we are missing required field(s) unless ($firstName && $lastName && $address) { warn "EDGAR::Forms::parse4: missing required field(s)...aborting!\n"; return ""; } #------------------------------------------------------------------------ # TABLE I PORTION #------------------------------------------------------------------------ # if we still have the header if ($table1 =~ /INITIAL STATEMENT OF.*BENEFICIAL OWNERSHIP/sig) { $table1 =~ s/.*?(TABLE I.*)/$1/sig; } # we don't have TABLE I, we have TABLE II if ($table1 =~ /TABLE II -+ Derivative Sec\w+s Beneficially Owned/si) { ($table1) = $initialText =~ /.*?(TABLE I.*)TABLE II.*/sig; } # get rid of pipes and underscores in data $table1 =~ s/-\|-/---/g; $table1 =~ s/\|/ /g; $table1 =~ s/_/ /g; # remove the table header, in case table tags were put in wrong spot $table1 =~ s/.*TABLE (I|1) -+ Non-Derivative Securities.*?Owned.*?\n//si; # remove extra footnote data for Table I $table1 =~ s/\n\s*Reminder:\s+Report.*//si; # replace blank lines with --- lines $table1 =~ s/\n\s*\n/\n------------------------------------------\n/gs; if ($debug > 5) { print "\n\n===================== TABLE I SENT =======================\n"; print $table1; print "\n===================== TABLE I SENT =======================\n\n\n"; } ($table1Rows, $table1Cols, @table1Arr) = &EDGARdoc::util::parseTable($table1); if ($debug > 5) { print "rows: $table1Rows\n"; print "columns: $table1Cols\n"; for ($rowCnt = 0; $rowCnt < $table1Rows; $rowCnt++) { for ($colCnt = 0; $colCnt < $table1Cols; $colCnt++) { print "item R $rowCnt, C $colCnt: $table1Arr[$rowCnt][$colCnt]\n"; } } } #------------------------------------------------------------------------ # TABLE II PORTION #------------------------------------------------------------------------ # remove the table header, in case table tags were put in wrong spot $table2 =~ s/TABLE II - Derivative Securities Beneficially Owned.*?\n//i; # we have TABLE I with TABLE II if ($table2 =~ /TABLE I -+ Non-Derivative Sec\w+s Beneficially Owned/si) { ($table2) = $initialText =~ /.*?(TABLE II.*)/sig; $table2 = "" . $table2; } # get rid of pipes in data $table2 =~ s/-\|-/---/g; $table2 =~ s/\|/ /g; $table2 =~ s/_/ /g; # remove extra footnote data for Table I $table2 =~ s/Explanation of Responses:.*//si; if ($debug > 5) { print "\n\n===================== TABLE II SENT =======================\n"; print $table2; print "\n===================== TABLE II SENT =======================\n\n\n"; } ($table2Rows, $table2Cols, @table2Arr) = &EDGARdoc::util::parseTable($table2); if ($debug > 5) { print "rows: $table2Rows\n"; print "columns: $table2Cols\n"; for ($rowCnt = 0; $rowCnt < $table2Rows; $rowCnt++) { for ($colCnt = 0; $colCnt < $table2Cols; $colCnt++) { print "item R $rowCnt, C $colCnt: $table2Arr[$rowCnt][$colCnt]\n"; } } } #------------------------------------------------------------------------ # FOOTER PORTION #------------------------------------------------------------------------ ($signature, $date) = EDGARdoc::util::getFormFooterInfo($footer); if ($debug > 5) { print "signature: $signature\n"; print "date: $date\n"; } #------------------------------------------------------------------------ # verify the required fields exist #------------------------------------------------------------------------ # filer first name if (! ($firstName =~ /\w+/)) { $requiredFields = 0; warn "filer first name is required!\n"; } # filer last name if (! ($lastName =~ /\w+/)) { $requiredFields = 0; warn "filer last name is required!\n"; } # filer street if (! ($address =~ /\w+/)) { $requiredFields = 0; warn "filer street address is required!\n"; } #------------------------------------------------------------------------ # XMLize the data #------------------------------------------------------------------------ # verify required data is present if ($requiredFields > 0) { $textOut .= EDGARdoc::util::getBeneficial1StartXML($amendMonth, $amendYear, $notSubjectToSection16, $form3HoldingsReported, $form4TransactionsReported); # HEADER $textOut .= "\n"; # issuerName $textOut .= "\n"; # filingType if ($jointFiling) { $textOut .= "\n"; } else { $textOut .= "\n"; } $textOut .= "\n"; # filer if (($lastName =~ /\w+/) && ($firstName =~ /\w+/)) { $textOut .= &EDGARdoc::util::getNameXML("filer", $firstName, $middleName, $lastName); # IRS id # if ($idInfo =~ /\w+/) { $textOut .= "". EDGAR::XML::trim($idInfo) ."\n"; } # relation of reporting person $textOut .= EDGARdoc::util::getRelationXML($director, $officer, $owner, $other, $relationAddendum); # address $textOut .= "
\n"; # postal $textOut .= &EDGARdoc::util::getPostalXML($address, $city, $state, $zipCode); $textOut .= "
\n"; $textOut .= "\n"; } $textOut .= &EDGARdoc::util::getDateXML("eventDate", $eventMonth, $eventDay, $eventYear); $textOut .= &EDGARdoc::util::getDateXML("originalDate", $amendMonth, $amendDay, $amendYear); $textOut .= "
\n"; # # XMLize TABLE I data # if ($table1Rows > 1) { for ($rowCnt = 1; $rowCnt < $table1Rows; $rowCnt++) { $textOut .= "\n"; # security title if (($table1Arr[$rowCnt][0] =~ /\w+/) && ($table1Arr[$rowCnt][0] !~ /Title of Security/)) { $textOut .= ""; $textOut .= EDGAR::XML::trim($table1Arr[$rowCnt][0]) . "\n"; } # ownership form $textOut .= EDGARdoc::util::getOwnershipFormXML($table1Arr[$rowCnt][8]); # nature of ownership $textOut .= EDGARdoc::util::getNatureOfOwnershipXML($table1Arr[$rowCnt][9]); if ($table1Arr[$rowCnt][1] =~ /\w+/) { ($month, $day, $year) = &EDGARdoc::util::getDateParts($table1Arr[$rowCnt][1]); $textOut .= &EDGARdoc::util::getDateXML("transactionDate", $month, $day, $year); } if ($table1Arr[$rowCnt][2] =~ /\w+/) { $textOut .= "\n"; } $table1Arr[$rowCnt][4] = EDGARdoc::util::toNumber($table1Arr[$rowCnt][3]); if ($table1Arr[$rowCnt][4] =~ /\w+/) { $textOut .= "\n"; } # exercisable date $table2Arr[$rowCnt][7] =~ s/-//gs; if ($table2Arr[$rowCnt][6] =~ /\w+/) { ($month, $day, $year) = &EDGARdoc::util::getDateParts($table2Arr[$rowCnt][7]); $textOut .= &EDGARdoc::util::getDateXML("exercisableDate", $month, $day, $year); } # expiration date if ($table2Arr[$rowCnt][8] =~ /\w+/) { ($month, $day, $year) = &EDGARdoc::util::getDateParts($table2Arr[$rowCnt][8]); $textOut .= &EDGARdoc::util::getDateXML("expirationDate", $month, $day, $year); } # underlying security if ($table2Arr[$rowCnt][9] =~ /\w+/) { $textOut .= "\n"; # amount of underlying security $table2Arr[$rowCnt][10] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][10]); if ($table2Arr[$rowCnt][10] =~ /\w+/) { $textOut .= " "; $textOut .= $table2Arr[$rowCnt][10]; $textOut .= "\n"; } $textOut .= "\n"; } # price $table2Arr[$rowCnt][1] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][1]); if ($table2Arr[$rowCnt][1] =~ /\w+/) { $textOut .= "\n"; } # ownership form $textOut .= EDGARdoc::util::getOwnershipFormXML($table2Arr[$rowCnt][13]); # nature of ownership $textOut .= EDGARdoc::util::getNatureOfOwnershipXML($table2Arr[$rowCnt][14]); $table2Arr[$rowCnt][11] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][12]); if ($table2Arr[$rowCnt][12] =~ /\w+/) { $textOut .= ""; $textOut .= $table2Arr[$rowCnt][12]; $textOut .= "\n"; } if ($table2Arr[$rowCnt][2] =~ /\w+/) { ($month, $day, $year) = EDGARdoc::util::getDateParts($table2Arr[$rowCnt][2]); $textOut .= &EDGARdoc::util::getDateXML("transactionDate", $month, $day, $year); } if ($table2Arr[$rowCnt][3] =~ /\w+/) { $textOut .= "\n"; } $table2Arr[$rowCnt][4] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][5]); if ($table2Arr[$rowCnt][5] =~ /\w+/) { $textOut .= "\n"; } ($month, $day, $year) = EDGARdoc::util::getDateParts($date); if (($month =~ /\w+/) && ($year =~ /\w+/)) { $textOut .= EDGARdoc::util::getDateXML("signatureDate", $month, $day, $year); } $textOut .= "\n"; $textOut .= "\n"; } else { # required fields aren't present, return an empty string $textOut = ""; if ($debug > 0) { print "\n\nERROR: Required Fields are not present.\n"; } } if ($debug > 0) { print "======================== TEXT OUT ===========================\n"; print $textOut; print "\n======================== TEXT OUT ===========================\n"; print "\n\n--- END PARSE TYPE 4 ---\n\n"; } $textOut = &EDGAR::XML::encode($textOut); $textOut; } # # EDGAR document type 5 # sub edgarParse5 { my($initialText) = shift; my($text, $textOut); # vars for main "chunks" of data my($header, $initialQuestions, $table1, $table2, $footer); my($notSubjectToSection16, $form3HoldingsReported, $form4TransactionsReported); my($idInfo, $addressInfo, $issuerName, $eventMonth, $eventDay, $eventYear, $amendMonth, $amendDay, $amendYear, $director, $owner, $officer, $other, $relationAddendum, $signature, $date); my($requiredFields) = 1; print "\n\n--- BEGIN PARSE TYPE 5 ---\n\n" if $debug > 0; $initialText = &EDGAR::XML::decode($initialText); $initialText = &EDGARdoc::util::fixText($initialText); ($header, $initialQuestions, $table1, $table2, $footer) = getSectionsFor3_4_5($initialText); if ($debug > 5) { print "~~~~~~~~~~~~~~~~~~~~~~ HEADER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "$header\n"; print "~~~~~~~~~~~~~~~~~~~~~~ HEADER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "~~~~~~~~~~~~~~~~~~ INITIAL QUESTIONS ~~~~~~~~~~~~~~~~~~~~~~~\n"; print "$initialQuestions\n"; print "~~~~~~~~~~~~~~~~~~ INITIAL QUESTIONS ~~~~~~~~~~~~~~~~~~~~~~~\n"; print "~~~~~~~~~~~~~~~~~~~~~~ TABLE I ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "$table1\n"; print "~~~~~~~~~~~~~~~~~~~~~~ TABLE I ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "~~~~~~~~~~~~~~~~~~~~~~ TABLE II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "$table2\n"; print "~~~~~~~~~~~~~~~~~~~~~~ TABLE II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "~~~~~~~~~~~~~~~~~~~~~~ FOOTER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; print "$footer\n"; print "~~~~~~~~~~~~~~~~~~~~~~ FOOTER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"; } #------------------------------------------------------------------------ # HEADER PORTION #------------------------------------------------------------------------ # get whether it's no longer subject to section 16 ($notSubjectToSection16, $form3HoldingsReported, $form4TransactionsReported) = &EDGARdoc::util::getFormHeaderInfo($header); #------------------------------------------------------------------------ # INITIAL QUESTIONS #------------------------------------------------------------------------ # get elements from the initial questions section ($firstName, $middleName, $lastName, $address, $city, $state, $zipCode, $issuerName, $issuerTicker, $idInfo, $eventMonth, $eventDay, $eventYear, $amendMonth, $amendDay, $amendYear, $director, $owner, $officer, $other, $relationAddendum, $indFiling, $jointFiling) = EDGARdoc::util::getFormInitialQuestionsInfo($initialQuestions); # abort if we are missing required field(s) unless ($firstName && $lastName && $address) { warn "EDGAR::Forms::parse5: missing required field(s)...aborting!\n"; return ""; } #------------------------------------------------------------------------ # TABLE I PORTION #------------------------------------------------------------------------ # if we still have the header if ($table1 =~ /INITIAL STATEMENT OF.*BENEFICIAL OWNERSHIP/sig) { $table1 =~ s/.*?(TABLE I.*)/$1/sig; } # we don't have TABLE I, we have TABLE II if ($table1 =~ /TABLE II -+ Derivative Sec\w+s Beneficially Owned/si) { ($table1) = $initialText =~ /.*?(TABLE I.*)TABLE II.*/sig; } # get rid of pipes and underscores in data $table1 =~ s/-\|-/---/g; $table1 =~ s/\|/ /g; $table1 =~ s/_/ /g; # remove the table header, in case table tags were put in wrong spot $table1 =~ s/.*TABLE (I|1) -+ Non-Derivative Securities.*?Owned.*?\n//si; # remove extra footnote data for Table I $table1 =~ s/\n\s*Reminder:\s+Report.*//si; # replace blank lines with --- lines $table1 =~ s/\n\s*\n/\n------------------------------------------\n/gs; if ($debug > 5) { print "\n\n===================== TABLE I SENT =======================\n"; print $table1; print "\n===================== TABLE I SENT =======================\n\n\n"; } ($table1Rows, $table1Cols, @table1Arr) = &EDGARdoc::util::parseTable($table1); if ($debug > 5) { print "rows: $table1Rows\n"; print "columns: $table1Cols\n"; for ($rowCnt = 0; $rowCnt < $table1Rows; $rowCnt++) { for ($colCnt = 0; $colCnt < $table1Cols; $colCnt++) { print "item R $rowCnt, C $colCnt: $table1Arr[$rowCnt][$colCnt]\n"; } } } #------------------------------------------------------------------------ # TABLE II PORTION #------------------------------------------------------------------------ # remove the table header, in case table tags were put in wrong spot $table2 =~ s/TABLE II - Derivative Securities Beneficially Owned.*?\n//i; # we have TABLE I with TABLE II if ($table2 =~ /TABLE I -+ Non-Derivative Sec\w+s Beneficially Owned/si) { ($table2) = $initialText =~ /.*?(TABLE II.*)/sig; $table2 = "" . $table2; } # get rid of pipes in data $table2 =~ s/-\|-/---/g; $table2 =~ s/\|/ /g; $table2 =~ s/_/ /g; # remove extra footnote data for Table I $table2 =~ s/Explanation of Responses:.*//si; if ($debug > 5) { print "\n\n===================== TABLE II SENT =======================\n"; print $table2; print "\n===================== TABLE II SENT =======================\n\n\n"; } ($table2Rows, $table2Cols, @table2Arr) = &EDGARdoc::util::parseTable($table2); if ($debug > 5) { print "rows: $table2Rows\n"; print "columns: $table2Cols\n"; for ($rowCnt = 0; $rowCnt < $table2Rows; $rowCnt++) { for ($colCnt = 0; $colCnt < $table2Cols; $colCnt++) { print "item R $rowCnt, C $colCnt: $table2Arr[$rowCnt][$colCnt]\n"; } } } #------------------------------------------------------------------------ # FOOTER PORTION #------------------------------------------------------------------------ ($signature, $date) = EDGARdoc::util::getFormFooterInfo($footer); if ($debug > 5) { print "signature: $signature\n"; print "date: $date\n"; } #------------------------------------------------------------------------ # verify the required fields exist #------------------------------------------------------------------------ # filer first name if (! ($firstName =~ /\w+/)) { $requiredFields = 0; warn "filer first name is required!\n"; } # filer last name if (! ($lastName =~ /\w+/)) { $requiredFields = 0; warn "filer last name is required!\n"; } # filer street if (! ($address =~ /\w+/)) { $requiredFields = 0; warn "filer street address is required!\n"; } #------------------------------------------------------------------------ # XMLize the data #------------------------------------------------------------------------ # verify required data is present if ($requiredFields > 0) { $textOut .= EDGARdoc::util::getBeneficial1StartXML($amendMonth, $amendYear, $notSubjectToSection16, $form3HoldingsReported, $form4TransactionsReported); # HEADER $textOut .= "\n"; # issuerName $textOut .= "\n"; # filingType if ($jointFiling) { $textOut .= "\n"; } else { $textOut .= "\n"; } $textOut .= "\n"; # filer if (($lastName =~ /\w+/) && ($firstName =~ /\w+/)) { $textOut .= &EDGARdoc::util::getNameXML("filer", $firstName, $middleName, $lastName); # IRS id # if ($idInfo =~ /\w+/) { $textOut .= "". EDGAR::XML::trim($idInfo) ."\n"; } # relation of reporting person $textOut .= EDGARdoc::util::getRelationXML($director, $officer, $owner, $other, $relationAddendum); # address $textOut .= "
\n"; # postal $textOut .= &EDGARdoc::util::getPostalXML($address, $city, $state, $zipCode); $textOut .= "
\n"; $textOut .= "\n"; } $textOut .= &EDGARdoc::util::getDateXML("eventDate", $eventMonth, $eventDay, $eventYear); $textOut .= &EDGARdoc::util::getDateXML("originalDate", $amendMonth, $amendDay, $amendYear); $textOut .= "
\n"; # # XMLize TABLE I data # if ($table1Rows > 1) { for ($rowCnt = 2; $rowCnt < $table1Rows; $rowCnt++) { $textOut .= "\n"; # security title if (($table1Arr[$rowCnt][0] =~ /\w+/) && ($table1Arr[$rowCnt][0] !~ /Title of Security/)) { $textOut .= ""; $textOut .= EDGAR::XML::trim($table1Arr[$rowCnt][0]) . "\n"; } # ownership form $textOut .= EDGARdoc::util::getOwnershipFormXML($table1Arr[$rowCnt][7]); # nature of ownership $textOut .= EDGARdoc::util::getNatureOfOwnershipXML($table1Arr[$rowCnt][8]); if ($table1Arr[$rowCnt][1] =~ /\w+/) { ($month, $day, $year) = &EDGARdoc::util::getDateParts($table1Arr[$rowCnt][1]); $textOut .= &EDGARdoc::util::getDateXML("transactionDate", $month, $day, $year); } if ($table1Arr[$rowCnt][2] =~ /\w+/) { $textOut .= "\n"; } $table1Arr[$rowCnt][3] = EDGARdoc::util::toNumber($table1Arr[$rowCnt][3]); if ($table1Arr[$rowCnt][3] =~ /\w+/) { $textOut .= "\n"; } # exercisable date $table2Arr[$rowCnt][6] =~ s/-//gs; if ($table2Arr[$rowCnt][6] =~ /\w+/) { ($month, $day, $year) = &EDGARdoc::util::getDateParts($table2Arr[$rowCnt][6]); $textOut .= &EDGARdoc::util::getDateXML("exercisableDate", $month, $day, $year); } # expiration date if ($table2Arr[$rowCnt][7] =~ /\w+/) { ($month, $day, $year) = &EDGARdoc::util::getDateParts($table2Arr[$rowCnt][7]); $textOut .= &EDGARdoc::util::getDateXML("expirationDate", $month, $day, $year); } # underlying security if ($table2Arr[$rowCnt][8] =~ /\w+/) { $textOut .= "\n"; # amount of underlying security $table2Arr[$rowCnt][9] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][9]); if ($table2Arr[$rowCnt][9] =~ /\w+/) { $textOut .= " "; $textOut .= $table2Arr[$rowCnt][9]; $textOut .= "\n"; } $textOut .= "\n"; } # price $table2Arr[$rowCnt][1] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][1]); if ($table2Arr[$rowCnt][1] =~ /\w+/) { $textOut .= "\n"; } # ownership form $textOut .= EDGARdoc::util::getOwnershipFormXML($table2Arr[$rowCnt][12]); # nature of ownership $textOut .= EDGARdoc::util::getNatureOfOwnershipXML($table2Arr[$rowCnt][13]); $table2Arr[$rowCnt][11] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][11]); if ($table2Arr[$rowCnt][11] =~ /\w+/) { $textOut .= ""; $textOut .= $table2Arr[$rowCnt][11]; $textOut .= "\n"; } if ($table2Arr[$rowCnt][2] =~ /\w+/) { ($month, $day, $year) = &EDGARdoc::util::getDateParts($table2Arr[$rowCnt][2]); $textOut .= &EDGARdoc::util::getDateXML("transactionDate", $month, $day, $year); } if ($table2Arr[$rowCnt][3] =~ /\w+/) { $textOut .= "\n"; } $table2Arr[$rowCnt][4] = EDGARdoc::util::toNumber($table2Arr[$rowCnt][4]); if ($table2Arr[$rowCnt][4] =~ /\w+/) { $textOut .= "\n"; } ($month, $day, $year) = &EDGARdoc::util::getDateParts($date); if (($month =~ /\w+/) && ($year =~ /\w+/)) { $textOut .= &EDGARdoc::util::getDateXML("signatureDate", $month, $day, $year); } $textOut .= "\n"; $textOut .= "\n"; } else { # required fields aren't present, return an empty string $textOut = ""; if ($debug > 0) { print "\n\nERROR: Required Fields are not present.\n"; } } if ($debug > 0) { print "======================== TEXT OUT ===========================\n"; print $textOut; print "\n======================== TEXT OUT ===========================\n"; print "\n\n--- END PARSE TYPE 5 ---\n\n"; } $textOut = &EDGAR::XML::encode($textOut); $textOut; } # parse document of type 144 sub edgarParse144 { local($text) = @_; $text = &EDGAR::XML::decode($text); $text = &EDGARdoc::util::fixText($text); # Check the format of the header if($text =~ /1\(a\)\s*NAME OF ISSUER.*\(c\) S.E.C. FILE NO./i) { $doctype = 1; } else { $doctype = 2; } # Code to handle document of type 1 if ($doctype == 1) { # Start extracting required data # Processing HEADER # ($table) = ($text =~ /(.*?<\/TABLE>)/s); # ($table) = ($table =~ /(
.*)1\(d\).*/s); # $reqdata = EDGARdoc::util::parseTable($table); ($line1) = ($text =~ /1\(a\) NAME OF ISSUER.*S.E.C. FILE NO.\n(.*?-{3,})/si); $line1 =~ s/-{3,}//; $line1 =~ s/\n//gsi; ($line1) = ($line1 =~ /\s*(.*)/); ($issuerName, $irsId, $fileNo) = split(/\s{3,}/, $line1); ($line2) = ($text =~ /1\(d\) ADDRESS OF ISSUER.*TELEPHONE NO.\n(.*)2\(a\)/si); $line2 =~ s/-{3,}//gs; $line2 =~ s/AREA//gsi; $line2 =~ s/NUMBER//gsi; $line2 =~ s/CODE//gsi; $line2 =~ s/\n//gs; ($line2) = ($line2 =~ /\s*(.*)/); ($street, $city, $state, $zip, $area, $number) = split(/\s{3,}/, $line2); ($line3) = ($text =~ /2\(a\) NAME OF PERSON FOR WHOSE ACCOUNT.*IDENT. NO.(.*?-{3,})/si); $line3 =~ s/-{3,}//gs; $line3 =~ s/\n//gs; ($line3) = ($line3 =~ /\s*(.*)/); ($sellerName, $sellerirsid, $relationship, $sellerAddress) = split(/\s{3,}/, $line3); if ($debug > 1) { print "-----------------------------------------------------------\n"; print " HEADER \n"; print "-----------------------------------------------------------\n"; print "Issuer Name = $issuerName\n"; print "IRS ID = $irsId\n"; print "SEC File Number = $fileNo\n"; print "Street = $street\n"; print "City = $city\n"; print "State = $state\n"; print "Seller Name = $sellerName\n"; print "Relationship = $relationship\n"; print "Seller Address = $sellerAddress\n"; } } # Code to parse the various tables in the form # Parsing the first table... ($table1) = ($text =~ /2.*\s*\(a\).*(
.*3\s*\(a\).*?<\/TABLE>)/si); $table1 =~ s/\n*
\n*//; $table1 =~ s/\n*<\/TABLE>\n*//; ($maxrow, $maxcol, @data) = EDGARdoc::util::parseTable($table1); # Extract the required data from the 2-dim array # Extracting security title $securityTitle = EDGARdoc::util::getTableData('(a)', $maxrow, $maxcol, @data); # Extracting NAme of each Broker $brokerName = EDGARdoc::util::getTableData('Each', $maxrow, $maxcol, @data); # Extracting Number of Shares or other Units to be sold $numberShares = EDGARdoc::util::getTableData('Number of', $maxrow, $maxcol, @data); # Extracting Aggregate Market Value $marketValue = EDGARdoc::util::getTableData('grega', $maxrow, $maxcol, @data); # Extracting Number of shares or other units outstanding $numberOutstanding = EDGARdoc::util::getTableData('Outstanding', $maxrow, $maxcol, @data); # Extracting Approximate Date of Sale $saleDate = EDGARdoc::util::getTableData('Approximate', $maxrow, $maxcol, @data); # Extracting Name of each Securities Exchange $securitiesExchange = EDGARdoc::util::getTableData('Exchange', $maxrow, $maxcol, @data); # Parsing TABLE - I ($tableI) = ($text =~ /(TABLE I.*?
.*)TABLE II/si); ($tableI) = ($tableI =~ /.*
(.*)\n/si); ($maxrow, $maxcol, @data) = EDGARdoc::util::parseTable($tableI); # Extracting data from TABLE - I # Extracting Date You acquired $acquiredDate = EDGARdoc::util::getTableData('Date You', $maxrow, $maxcol, @data); # Extracting NAture Of Acquisition Transaction $natureOfAcquisition = EDGARdoc::util::getTableData('Transaction', $maxrow, $maxcol, @data); # Extracting Name of Person from whom acquired $acquiredFrom = EDGARdoc::util::getTableData('Person from', $maxrow, $maxcol, @data); # Extracting Amount of Securities ACquired $amount = EDGARdoc::util::getTableData('Securitie', $maxrow, $maxcol, @data); # Extracting Date of PAyment $paymentDate = EDGARdoc::util::getTableData('Date Of', $maxrow, $maxcol, @data); # Extracting Nature of payment $paymentNature = EDGARdoc::util::getTableData('Nature of Payment', $maxrow, $maxcol, @data); # PArsing Table - II ($tableII) = ($text =~ /TABLE II\s*-*\s*SECURITIES.*sold.\n(.*)\nREMARKS/si); ($maxrow, $maxcol, @data) = EDGARdoc::util::parseTable($tableII); # Extracting Data from Table - II # Extracting name and Address of seller $sellerName = EDGARdoc::util::getTableData('Name and Address', $maxrow, $maxcol, @data); # Extracting Title of securities $securitiesSold = EDGARdoc::util::getTableData('Securities', $maxrow, $maxcol, @data); # Extracting Date of Sale $saleDate1 = EDGARdoc::util::getTableData('Date of Sale', $maxrow, $maxcol, @data); # Extracting Amount of securities sold $amount1 = EDGARdoc::util::getTableData('Amo', $maxrow, $maxcol, @data); # Extracting Gross Proceeds $gross = EDGARdoc::util::getTableData('Gross', $maxrow, $maxcol, @data); # Parsing the footer ($noticeDate) = ($text=~ /REMARKS:.*?\s*?\n(.*)DATE OF NOTICE/si); $noticeDate =~ s/(.*)\n-{3,}.*/$1/gsi; $noticeDate =~ s/.*\n(.*)/$1/gsi; $noticeDate = EDGAR::XML::trim($noticeDate); ($signature) = ($text=~ /DATE OF NOTICE.*?\s*?\n(.*)SIGNATURE/si); $signature =~ s/(.*)\n-{3,}.*/$1/gsi; $signature =~ s/.*\n(.*)/$1/gsi; $signature = EDGAR::XML::trim($signature); if($signature =~ /$noticeDate/) { $signature =~ s/$noticeDate//; } if ($debug > 1) { print "--------------------Section 3------------------------------\n"; print "Security Title = $securityTitle\n"; print "Broker Name = $brokerName\n"; print "Number of shares = $numberShares\n"; print "Market Value = $marketValue\n"; print "Number Outstanding = $numberOutstanding\n"; print "Sale Date = $saleDate\n"; print "Securities Exchange = $securitiesExchange\n\n\n\n"; print "-------------------Table - I-------------------------------\n"; print "Acquired Date = $acquiredDate\n"; print "Nature of acquisition = $natureOfAcquisition\n"; print "Acquired From = $acquiredFrom\n"; print "Amount = $amount\n"; print "Payment Date = $paymentDate\n"; print "PAyment NAture = $paymentNature\n\n\n\n"; print "--------------------Table - II-----------------------------\n"; print "Seller NAme = $sellerName\n"; print "Title = $securitiesSold\n"; print "Sale Date = $saleDate1\n"; print "Amount = $amount1\n"; print "Gross = $gross\n\n\n\n"; print "---------------------Footer--------------------------------\n"; print "Notice Date = $noticeDate\n"; print "Signature = $signature\n"; } # Code to handle document of type 2 if ($doctype == 2) { # Start extracting required data # Processing HEADER if ($debug > 1) { print "-------------------HEADER--------------------------\n"; } # Processing issuername ($issuerName) = ($text =~ /NAME OF ISSUER \(Please type or print\)(.*)/s); $newtext = $issuerName; ($issuerName) = ($issuerName =~ /.*\n*(.*)\n*.*/); $issuerName =~ s/\s*//g; # Processing IRS Identification Number ($irsId) = ($newtext =~ /IRS IDENT. NO.(.*)/s); $newtext = $irsId; ($irsId) = ($irsId =~ /.*\n*(.*)\n*.*/); $irsId =~ s/\s*//g; # Processing SEC File Number ($fileNo) = ($newtext =~ /S.E.C. FILE NO.(.*)/s); $newtext = $fileNo; ($fileNo) = ($fileNo =~ /.*\n*(.*)\n*.*/); $fileNo =~ s/\s*//g; # Processing Issuer Address ($issuerAddress) = ($newtext =~ /ADDRESS OF ISSUER(.*)/s); $newtext = $issuerAddress; ($issuerAddress) = ($issuerAddress =~ /.*\n*(.*)\n*.*/); ($issuerAddress) = ($issuerAddress =~ /\s*(.*)/); # Processing Telephone Number ($telephone) = ($newtext =~ /TELEPHONE NO.(.*)/s); $newtext = $telephone; ($telephone) = ($telephone =~ /.*\n*(.*)\n*.*/); # Processing Seller Name ($sellername) = ($newtext =~ /NAME OF PERSON FOR WHOSE ACCOUNT THE SECURITIES ARE TO BE SOLD(.*)/s); $newtext = $sellername; ($sellername) = ($sellername =~ /.*\n*\s*(.*)\n*.*/); ($sellerfname, $sellermname, $sellerlname) = ($sellername =~ /(\w*)\s*(\w*)\s*(\w*)/); if(length($sellerlname) == 0) { $sellerlname = $sellermname; $sellermname = ""; } # Processing Seller IRS ID ($sellerirsid) = ($newtext =~ /IRS IDENT. NO.(.*)/si); $newtext = $sellerirsid; ($sellerirsid) = ($sellerirsid =~ /.*\n*\s*(.*)\n*.*/); $sellerirsid =~ s/-{3,}//gsi; $sellerirsid =~ s/\n//gsi; # Processing Relationship to Issuer ($relationship) = ($newtext =~ /RELATIONSHIP TO ISSUER(.*)/s); $newtext = $relationship; ($relationship) = ($relationship =~ /.*\n*\s*(.*)\n*.*/); # Processing Seller Address ($sellerAddress) = ($newtext =~ /ADDRESS(.*)/si); $newtext = $sellerAddress; ($sellerAddress) = ($sellerAddress =~ /.*\n\s*(.*)\n*.*/); $sellerAddress =~ s/-{3,}//gsi; $sellerAddress =~ s/\n//gsi; if ($debug > 1) { print "Issuer Name = $issuerName\n"; print "IRS ID = $irsId\n"; print "File Number = $fileNo\n"; print "Issuer Address = $issuerAddress\n"; print "Telephone Number = $telephone\n"; print "Seller First NAme = $sellerfname\n"; print "Seller Middle NAme = $sellermname\n"; print "Seller Last NAme = $sellerlname\n"; print "Seller Full NAme = $sellername\n"; print "Seller IRS ID = $sellerirsid\n"; print "Relationship = $relationship\n"; print "Seller Address = $sellerAddress\n"; } } # XMLize the data extracted $XMLout = ''; $XMLout .= "\n"; $XMLout .= "\n"; # Entity Issuer $issuerName =~ s/\s+/\s/g; $XMLout .= "\n"; if ($irsId =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($irsId) . "\n"; } $XMLout .= "" . EDGAR::XML::trim($fileNo) . "\n"; ($street, $city, $state, $zip) = split(/\s{2,}/, $issuerAddress); if ($street =~ /\w+/) { $XMLout .= "
\n"; $XMLout .= EDGARdoc::util::getPostalXML($street, $city, $state, $zip); ($telephone) = ($telephone =~ /\s*(.*)/); ($area, $number) = split(/\s{2,}/, $telephone); $XMLout .= EDGARdoc::util::getPhoneXML("phone", $area, $number); $XMLout .= "
\n"; } $XMLout .= "
\n"; # Entity Seller if (($sellerfname =~ /\w+/) && ($sellerlname =~ /\w+/)) { $XMLout .= EDGARdoc::util::getNameXML('seller', $sellerfname, $sellermname, $sellerlname); if($sellerirsid =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($sellerirsid) . "\n"; } if($relationship =~ /\w+/) { $XMLout .= "\n"; } ($sellerAddress) = ($sellerAddress =~ /\s*(.*)/); ($street, $city, $state, $zip) = split(/\s{3,}/, $sellerAddress); if ($street =~ /\w+/) { $XMLout .= "
"; $XMLout .= EDGARdoc::util::getPostalXML($street, $city, $state, $zip); $XMLout .= "
"; } $XMLout .= "\n"; } # Element security title ($securityTitle) = ($securityTitle =~ /\s*(.*)/); $XMLout .= "" . EDGAR::XML::trim($securityTitle) . "\n"; # Entity broker ($brokername) = ($brokername =~ /\s*(.*)/); # if there's a comma in the name, switch the first and last name if ($brokername =~ /\.$/) { $brokername =~ s/,//; ($lastName, $firstName, $middleName) = $brokername =~ /(\w+)\s+(\w+)\s+(\w*)/; } elsif ($brokername =~ /,/) { ($lastName, $firstName, $middleName) = $brokername =~ /(\w+)\,\s+(\w+)|((\w+)\s+ (\w*))/; } else { $brokername =~ s/,|\.//; ($firstName, $middleName, $lastName) = $brokername =~ /(\w+)\s+(\w*?)\s*(\w+)$/; if ($debug > 5) { print "NAME INFO:::$brokername\n"; print "FIRST:::$firstName\n"; print "LAST:::$lastName\n"; print "MI:::$middleName\n\n"; } } if (($firstName =~ /\w+/) && ($lastName =~ /\w+/)) { $XMLout .= EDGARdoc::util::getNameXML('broker', $firstName, $middleName, $lastName); $XMLout .= "\n"; } # Element Number of Shares $numberShares = EDGARdoc::util::toNumber($numberShares); $XMLout .= "$numberShares\n"; # Element Market Value ($marketValue) = ($marketValue =~ /\s*(.*)/); ($dollarValue, $date) = split(/as of/, $marketValue); ($month, $day, $year) = EDGARdoc::util::getDateParts($date); $dollarValue = EDGARdoc::util::toNumber($dollarValue); $XMLout .= "\n";; $XMLout .= EDGARdoc::util::getDateXML("marketDate", $month, $day, $year); $XMLout .= "\n"; # Element Number Outstanding ($numberOutstanding) = ($numberOutstanding =~ /\s*(.*)/); $numberOutstanding = EDGARdoc::util::toNumber($numberOutstanding); $XMLout .= "$numberOutstanding\n"; # Element Sale Date ($saleDate) = ($saleDate =~ /\s*(.*)/); ($month, $day, $year) = EDGARdoc::util::getDateParts($saleDate); $XMLout .= EDGARdoc::util::getDateXML("saleDate", $month, $day, $year); # Element Securities Exchange ($securitiesExchange) = ($securitiesExchange =~ /\s*(.*)/); $XMLout .= "" . EDGAR::XML::trim($securitiesExchange); $XMLout .= "\n"; $XMLout .= "
\n\n"; # Element Securities to be sold $XMLout .= "\n"; # Element Security Title $XMLout .= "" . EDGAR::XML::trim($securityTitle) . "\n"; # Element Acquired Date ($acquiredDate) = ($acquiredDate =~ /\s*(.*)/); ($month, $day, $year) = EDGARdoc::util::getDateParts($acquiredDate); $XMLout .= EDGARdoc::util::getDateXML("acquiredDate", $month, $day, $year); # Element Nature of Acquisition ($natureOfAcquisition) = ($natureOfAcquisition =~ /\s*(.*)/); $XMLout .= "" . EDGAR::XML::trim($natureOfAcquisition); $XMLout .= "\n"; # Element Acquired From ($acquiredFrom) = ($acquiredFrom =~ /\s*(.*)/); $XMLout .= "" . EDGAR::XML::trim($acquiredFrom) . "\n"; # Element amount $amount = EDGARdoc::util::toNumber($amount); $XMLout .= "$amount\n"; # Element payment Date ($paymentDate) = ($paymentDate =~ /\s*(.*)/); ($month, $day, $year) = EDGARdoc::util::getDateParts($paymentDate); $XMLout .= EDGARdoc::util::getDateXML("paymentDate", $month, $day, $year); # Element payment Nature ($paymentNature) = ($paymentNature =~ /\s*(.*)/); $XMLout .= "" . EDGAR::XML::trim($paymentNature) . "\n"; $XMLout .= "\n"; $XMLout .= "\n"; # Securities Sold $XMLout .= "\n"; # Element sellername $XMLout .= EDGARdoc::util::getNameXML('seller', $sellerfname, $sellermname, $sellerlname); ($sellerAddress) = ($sellerAddress =~ /\s*(.*)/); ($street, $city, $state, $zip) = split(/\s{3,}/, $sellerAddress); if ($street =~ /\w+/) { $XMLout .= "
\n"; $XMLout .= EDGARdoc::util::getPostalXML($street, $city, $state, $zip); $XMLout .= "
\n"; } $XMLout .= "\n"; $XMLout .= "" . EDGAR::XML::trim($securitiesSold) . "\n"; ($saleDate1) = ($saleDate1 =~ /\s*(.*)/g); ($amount1) = ($amount1 =~ /\s*(.*)/g); ($gross) = ($gross =~ /\s*(.*)/g); $gross =~ s/\$/ /g; $gross =~ s/\s+/ /g; ($saleDate1) = ($saleDate1 =~ /\s(.*)/); ($gross) = ($gross =~ /\s(.*)/); if ($debug > 1) { print $saleDate1, "\n"; print $amount1, "\n"; print $gross, "\n"; } @saleDates = split(/ /, $saleDate1); @amts = split(/ /, $amount1); @grosses = split(/ /, $gross); $datacount = 0; foreach $date(@saleDates) { ($month, $day, $year) = EDGARdoc::util::getDateParts($saleDates[$datacount]); $XMLout .= EDGARdoc::util::getDateXML("saleDate", $month, $day, $year); $amts[$datacount] = EDGARdoc::util::toNumber($amts[$datacount]); $XMLout .= "$amts[$datacount]\n"; $grosses[$datacount] = EDGARdoc::util::toNumber($grosses[$datacount]); $XMLout .= "$grosses[$datacount]\n"; $datacount++; } $XMLout .= "
\n\n"; # Footer $XMLout .= "
\n"; # Element Signature ($signature) = ($signature =~ /\s*\/s\/(.*)/); if ($signature =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($signature) . "\n"; } # Element Date Notice ($month, $day, $year) = EDGARdoc::util::getDateParts($noticeDate); if (($month =~ /\w+/) && ($year =~ /\w+/)) { $XMLout .= EDGARdoc::util::getDateXML("signatureDate", $month, $day, $year); } $XMLout .= "
\n"; $XMLout .= "
\n"; if ($debug > 1) { print "\n\n\n\n------------XML OUTPUT------------------\n\n\n"; print $XMLout; } $XMLout = &EDGAR::XML::encode($XMLout); $XMLout; } # Get Address parts sub getAddressParts { local($address) = @_; local(@country) = ('Liechtenstein'); local(@states) = ('New Jersey', 'NJ', 'New York', 'NY', 'California', 'CA', 'Virginia', 'VA', 'Nebraska', 'NE', 'Florida', 'FL', 'Oklahoma', 'OK', 'Massachusetts', 'MA', 'Utah', 'UT', 'ILLINOIS', 'IL', 'Kentucky', 'KY', 'Georgia', 'GA', 'Alabama', 'AL', 'Connecticutt', 'CT', 'Texas', 'TX', 'Pennsylvania', 'PA', 'Delaware', 'DE', 'Mississippi', 'MS', 'Minnesota', 'MN', 'Iowa', 'IA', 'Missouri', 'MO', 'Wisconsin', 'WI', 'Maryland', 'MD', 'North Carolina', 'NC', 'South Carolina', 'SC', 'Indiana', 'IN', 'West Virginia', 'WV', 'Ohio', 'OH', 'North Dakota', 'ND', 'South Dakota', 'SD', 'Washington', 'WA', 'Oregon', 'OR', 'Arizona', 'AZ', 'Alaska', 'AK', 'Vermont', 'VT', 'New Hampshire', 'NH', 'Tennesse', 'TN', 'Lousiana', 'LA', 'Kansas', 'KS', 'Arkansas', 'AR', 'Wyoming', 'WY', 'Colorado', 'CO', 'Idaho', 'ID', 'Michigan', 'MI', 'Nevada', 'NV', 'Hawai', 'HI', 'Maine', 'ME', 'Montana', 'MT', 'DC'); local($state, $st, $street, $city, $country, $zip, $c, $l, $linecount, $index); local(@lines); $address =~ s/_{3,}//gsi; foreach $st(@states) { if($address =~ /\b$st\b\s*\d+/i) { $state = $st; last; } } # Check if international foreach $c(@country) { if ($address =~ /$c\W*/) { $country = $c; } } # United States if(length($country) eq 0) { # Get zip code ($zip) = ($address =~ /.*$state(.*)/si); # Get city ($street) = ($address =~ /(.*)$state.*$zip/si); ($street) = ($address =~ /(.*)$city.*$state.*$zip/si); @lines = split(/,/, $street); $linecount = 0; foreach $l(@lines) { $linecount++; } for($index=$linecount-1;$index>=0;$index--) { if($lines[$index] =~ /\w+/) { $city = $lines[$index]; last; } } } else { # Get zip code ($zip) = ($address =~ /.*(\d*-*\d*)\W*$country/si); ($street) = ($address =~ /(.*)$state.*$zip/si); ($street) = ($address =~ /(.*).*$zip.*$country/si); @lines = split(/,/, $street); $linecount = 0; foreach $l(@lines) { $linecount++; } for($index=$linecount-1;$index>=0;$index--) { if($lines[$index] =~ /\w+/) { $city = $lines[$index]; last; } } $street =~ s/$city//gsi; } $street =~ s/$city//gsi; $street =~ s/,/ /gsi; $city =~ s/,/ /gsi; $zip =~ s/,/ /gsi; return ($street, $city, $state, $zip, $country); } # Get Footer sub getSCFooter { local($footer) = @_; local(@footers) = split(/\n/, $footer); local($footindex) = 0; local($datedone) = 0; local($line, $foundDate, $month, $day, $year, $out, $start, $end, $dash, $foundSig); local($sigDateXML, $sigXML); foreach $line(@footers) { # Look for Dated: ------- if($line =~ /Dated:/si && $datedone eq 0) { ($foundDate) = ($line =~ /Dated:(.*)/si); ($month, $day, $year) = EDGARdoc::util::getDateParts($foundDate); if (($month =~ /\w+/) && ($year =~ /\w+/)) { $sigDateXML = EDGARdoc::util::getDateXML("signatureDate", $month, $day, $year); } $datedone = 1; } elsif($line =~ /Date:/si && $datedone eq 0) { ($foundDate) = ($line =~ /Date:(.*)/si); ($month, $day, $year) = EDGARdoc::util::getDateParts($foundDate); if (($month =~ /\w+/) && ($year =~ /\w+/)) { $sigDateXML = EDGARdoc::util::getDateXML("signatureDate", $month, $day, $year); } $datedone = 1; # Look for .......\n--------------\nDate } elsif($line =~ /Date/ && $datedone eq 0) { # Check for existence of ---------- $start = index($line, 'Date', 0); $end = $start + 8; $start -= $start; $end += $end; if ($start < 0) { $start = 0; } ($dash) = substr($footers[$footindex-1], $start, $end - $start); if($dash =~ /-{3,}/) { ($foundDate) = substr($footers[$footindex-2], $start, $end-$start); ($month, $day, $year) = EDGARdoc::util::getDateParts($foundDate); if (($month =~ /\w+/) && ($year =~ /\w+/)) { $sigDateXML = EDGARdoc::util::getDateXML("signatureDate", $month, $day, $year); } $datedone = 1; } # Look for .......\n------------------\n(Date) } elsif ($line =~ /\(Date\)/ && $datedone eq 0) { # Check for existence of ---------- $start = index($line, '(Date)', 0); $end = $start + 8; $start -= $start; $end += $end; if ($start < 0) { $start = 0; } ($dash) = substr($footers[$footindex-1], $start, $end - $start); if($dash =~ /-{3,}/) { ($foundDate) = substr($footers[$footindex-2], $start, $end-$start); ($month, $day, $year) = EDGARdoc::util::getDateParts($foundDate); if (($month =~ /\w+/) && ($year =~ /\w+/)) { $sigDateXML = EDGARdoc::util::getDateXML("signatureDate", $month, $day, $year); } $datedone = 1; } } if($line =~ /\/s\//si) { ($foundSig) = ($line =~ /\/s\/(.*)/si); $foundSig =~ s/\s{3,}.*//gsi; } elsif($line =~ /\\s\\/si) { ($foundSig) = ($line =~ /\\s\\(.*)/si); } elsif($line =~ /--\/\/(.*)\/\/--/si) { ($foundSig) = ($line =~ /--\/\/(.*)\/\/--/si); } if ($foundSig =~ /\w+/) { $sigXML = "" . EDGAR::XML::trim($foundSig) . "\n"; } $footindex++; } $out = $sigXML . $sigDateXML; return $out; } # get items section sub getItemsSC13G { local($newtext) = @_; local($item1a, $item2a, $item2b, $item2c, $item2d, $item6, $item5, $item3); local($item7, $item8, $item9, $item10, $certification, $dissolution); local($subsidiary, $roleCheck, $ownershipCeased, $otherReceiver); $newtext =~ s###sg; ($item1a) = ($newtext =~ /(Item 1.*?\(*?a\).*?\(*?b\))/si); ($item1a) = ($item1a =~ /a\)(.*)\(*b\)/si); $item1a =~ s/The name of the issuer is//gsi; $item1a =~ s/The name of the issuer//gsi; $item1a =~ s/The issuer to which this statement relates is//gsi; $item1a =~ s/Name of issuer//gsi; $item1a =~ s/ITEM.*//gsi; $item1a =~ s/\n|:/ /gsi; $item1a =~ s/-{2,}//gsi; $item1a =~ s/_{2,}//gsi; $item1a =~ s/\s{2,}/ /gsi; ($item2a) = ($newtext =~ /(Item 2.*?\(*?a\).*?\(*?b\))/si); ($item2a) = ($item2a =~ /a\)(.*)\(*b\)/si); $item2a =~ s/Name of person\(*s*\)* filing//gsi; $item2a =~ s/The reporting person is//gsi; $item2a =~ s/This statement is filed on behalf of//gsi; $item2a =~ s/Names* of persons* filing//gsi; $item2a =~ s/ITEM.*//gsi; $item2a =~ s/\n|:/ /gsi; $item2a =~ s/-{2,}//gsi; $item2a =~ s/_{2,}//gsi; $item2a =~ s/\s{2,}/ /gsi; ($item2b) = ($newtext =~ /(Item 2.*?\(*?b\).*?\(*?c\))/si); ($item2b) = ($item2b =~ /b\)(.*)\(*c\)/si); $item2b =~ s/\n/,/gsi; $item2b =~ s/:|\(|\)/ /gsi; $item2b =~ s/-{2,}//gsi; $item2b =~ s/\s{2,}/ /gsi; $item2b =~ s/ITEM.*//gsi; $item2b =~ s/The residence address.*?is//gsi; $item2b =~ s/Address.*Principal.*?Residence//gsi; $item2b =~ s/Address.*Business Offices*//gsi; ($item2c) = ($newtext =~ /(Item 2.*?\(*?c\).*?\(*?d\))/si); ($item2c) = ($item2c =~ /c\)(.*)\(*d\)/si); $item2c =~ s/Citizenship//gsi; $item2c =~ s/ITEM.*//gsi; $item2c =~ s/\n|:/ /gsi; $item2c =~ s/-{2,}//gsi; $item2c =~ s/_{2,}//gsi; $item2c =~ s/\s{2,}/ /gsi; ($item2d) = ($newtext =~ /(Item 2.*?\(*?d\).*?\(*?e\))/si); ($item2d) = ($item2d =~ /d\)(.*)\(*e\)/si); $item2d =~ s/Title of Class of securities//gsi; $item2d =~ s/ITEM.*//gsi; $item2d =~ s/\n|:/ /gsi; $item2d =~ s/-{2,}//gsi; $item2d =~ s/_{2,}//gsi; $item2d =~ s/\s{2,}/ /gsi; ($item6) = ($newtext =~ /(Item 6.*?Item 7)/si); $item6 =~ s/\n/ /gsi; $item6 =~ s/-{3,}//gsi; $item6 =~ s/_{3,}//gsi; $item6 =~ s/\s{2,}/ /gsi; ($otherReceiver) = ($item6 =~ /.*Ownership.*Another Person.(.*)Item/si); ($item5) = ($newtext =~ /(Item 5.*?Item 6)/si); $item5 =~ s/\n/ /gsi; $item5 =~ s/-{3,}//gsi; $item5 =~ s/_{3,}//gsi; $item5 =~ s/\s{2, }/ /gsi; ($ownershipCeased) = ($item5 =~ /.*Ownership.*of a Class.(.*)Item/si); ($item3) = ($newtext =~ /(Item 3.*?Item 4)/si); $item3 =~ s/\n/ /gsi; $item3 =~ s/-{3,}//gsi; $item3 =~ s/_{3,}//gsi; $item3 =~ s/\s{2,}/ /gsi; ($roleCheck) = ($item3 =~ /.*Type of Reporting person.(.*)Item/si); $roleCheck =~ s/See//gsi; $roleCheck =~ s/Instructions//gsi; ($item7) = ($newtext =~ /(Item 7.*?Item 8)/si); $item7 =~ s/\n/ /gsi; $item7 =~ s/-{3,}//gsi; $item7 =~ s/_{3,}//gsi; $item7 =~ s/\s{2,}/ /gsi; ($subsidiary) = ($item7 =~ /.*Identification and.*Holding Company.(.*)Item/si); ($item8) = ($newtext =~ /(Item 8.*?Item 9)/si); $item8 =~ s/\n/ /gsi; $item8 =~ s/-{3,}//gsi; $item8 =~ s/_{3,}//gsi; $item8 =~ s/\s{2,}/ /gsi; ($groupId) = ($item8 =~ /.*Identification and Classification.*Group.(.*)Item/si); ($item9) = ($newtext =~ /(Item 9.*?Item 10)/si); $item9 =~ s/\n/ /gsi; $item9 =~ s/-{3,}//gsi; $item9 =~ s/_{3,}//gsi; $item9 =~ s/\s{2,}/ /gsi; ($dissolution) = ($item9 =~ /.*Notice of.*Group.(.*)Item/si); ($item10) = ($newtext =~ /(Item 10.*?signature)/si); $item10 =~ s/Page . of . pages//gsi; $item10 =~ s/signature//gsi; $item10 =~ s/\n/ /gsi; $item10 =~ s/-{3,}//gsi; $item10 =~ s/_{3,}//gsi; $item10 =~ s/\s{2,}/ /gsi; ($certification) = ($item10 =~ /.*Certification.(.*)/si); return($item1a, $item2a, $item2b, $item2c, $item2d, $otherReceiver, $ownershipCeased, $roleCheck, $groupId, $dissolution, $certification); } sub getItemsSC13D { local($newtext) = @_; local($item2a, $item2b, $item2c, $item2d, $item2e, $item2f); local($item5b, $item5d, $item5e, $item3, $item4, $item6); local($considerationSource, $purpose, $contact); ($item2a) = ($newtext =~ /(Item 2.*?\(*?a\).*?\(*?b\))/si); ($item2a) = ($item2a =~ /a\)(.*)\(*b\)/si); $item2a =~ s/This schedule.*?by//gsi; $item2a =~ s/ITEM.*//gsi; $item2a =~ s/\n|:/ /gsi; $item2a =~ s/-{2,}//gsi; $item2a =~ s/_{2,}//gsi; $item2a =~ s/\s{2,}/ /gsi; ($item2b) = ($newtext =~ /(Item 2.*?\(*?b\).*?\(*?c\))/si); ($item2b) = ($item2b =~ /b\)(.*)\(*c\)/si); $item2b =~ s/\n/,/gsi; $item2b =~ s/:|\(|\)/ /gsi; $item2b =~ s/-{2,}//gsi; $item2b =~ s/\s{2,}/ /gsi; $item2b =~ s/ITEM.*//gsi; $item2b =~ s/The address of.*?is//gsi; ($item2c) = ($newtext =~ /(Item 2.*?\(*?c\).*?\(*?d\))/si); ($item2c) = ($item2c =~ /c\)(.*)\(*d\)/si); $item2c =~ s/\n|:/ /gsi; $item2c =~ s/-{2,}//gsi; $item2c =~ s/_{2,}//gsi; $item2c =~ s/\s{2,}/ /gsi; ($item2d) = ($newtext =~ /(Item 2.*?\(*?d\).*?\(*?e\))/si); ($item2d) = ($item2d =~ /d\)(.*)\(*e\)/si); $item2d =~ s/\n|:/ /gsi; $item2d =~ s/-{2,}//gsi; $item2d =~ s/_{2,}//gsi; $item2d =~ s/\s{2,}/ /gsi; ($item2e) = ($newtext =~ /(Item 2.*?\(*?e\).*?\(*?f\))/si); ($item2e) = ($item2e =~ /e\)(.*)\(*f\)/si); $item2e =~ s/\n|:/ /gsi; $item2e =~ s/-{2,}//gsi; $item2e =~ s/_{2,}//gsi; $item2e =~ s/\s{2,}/ /gsi; ($item2f) = ($newtext =~ /(Item 2.*?\(*?f\).*?Item)/si); ($item2f) = ($item2f =~ /f\)(.*)Item/si); $item2f =~ s/\n|:/ /gsi; $item2f =~ s/-{2,}//gsi; $item2f =~ s/_{2,}//gsi; $item2f =~ s/\s{2,}/ /gsi; ($item5b) = ($newtext =~ /(Item 5.*?\(*?b\).*?\(*?c\))/si); ($item5b) = ($item5b =~ /b\)(.*)\(*c\)/si); $item5b =~ s/\n|:/ /gsi; $item5b =~ s/-{2,}//gsi; $item5b =~ s/_{2,}//gsi; $item5b =~ s/\s{2,}/ /gsi; ($item5d) = ($newtext =~ /(Item 5.*?\(*?d\).*?\(*?e\))/si); ($item5d) = ($item5d =~ /d\)(.*)\(*e\)/si); $item5d =~ s/\n|:/ /gsi; $item5d =~ s/-{2,}//gsi; $item5d =~ s/_{2,}//gsi; $item5d =~ s/\s{2,}/ /gsi; ($item5e) = ($newtext =~ /(Item 5.*?\(*?e\).*?Item)/si); ($item5e) = ($item5d =~ /e\)(.*)Item/si); $item5e =~ s/\n|:/ /gsi; $item5e =~ s/-{2,}//gsi; $item5e =~ s/_{2,}//gsi; $item5e =~ s/\s{2,}/ /gsi; ($item3) = ($newtext =~ /(Item 3.*?Item 4)/si); $item3 =~ s/\n/ /gsi; $item3 =~ s/-{3,}//gsi; $item3 =~ s/_{3,}//gsi; $item3 =~ s/\s{2,}/ /gsi; ($considerationSource) = ($item3 =~ /.*Source and Amount of funds or other consideration.(.*)Item/si); ($item4) = ($newtext =~ /(Item 4.*?Item 5)/si); $item4 =~ s/\n/ /gsi; $item4 =~ s/-{3,}//gsi; $item4 =~ s/_{3,}//gsi; $item4 =~ s/\s{2,}/ /gsi; ($purpose) = ($item4 =~ /.*Purpose of Transaction.(.*)Item/si); ($item6) = ($newtext =~ /(Item 6.*?Signature|Item 7)/si); $item6 =~ s/\n/ /gsi; $item6 =~ s/-{3,}//gsi; $item6 =~ s/_{3,}//gsi; $item6 =~ s/\s{2,}/ /gsi; ($contracts) = ($item6 =~ /.*Contracts.*Respect.*the Issuer.(.*)Signature|Item 7/si); return($item2a, $item2b, $item2c, $item2d, $item2e, $item2f, $item5b, $item5d, $item5e, $considerationSource, $purpose, $contracts); } # parse document of type sc13g sub edgarParseSC13G { local($text) = @_; local($amendmentNumber, $lineindex, $line, $issuer_b2, $securityTitle, $cusip, $eventDate, $rule, $ruleb, $rulec, $ruled, $coverdone, $coverindex, $newtext, $cover, $filer, $gr, $groupA, $groupB, $sharesOwned, $roleCheck, $subsidiary, $ownershipCeased, $otherReceiver, $groupId, $item1a, $item2a, $item2b, $item2c, $item2d, $dissolution, $certification, $footer, $index, $XMLout, $month, $day, $year, $firstname, $middlename, $lastname, $ro, $rodata, $street, $city, $state, $zip, $country); local(@lines, @irsId, @filerName, @group, @secOnly, @citizenship, @soleVote, @sharedVote, @soleDispositive, @sharedDispositive, @aggregate, @role, @classPercent, @roles); # Remove & / < / " tags $text = &EDGAR::XML::decode($text); $text = &EDGARdoc::util::fixText($text); # Remove page m of n pages $text =~ s/page \d+ of \d+ pages//gsi; # Remove tags $text =~ s///gsi; $text =~ s/<\/document\.item>//gsi; # Parsing Header section # Getting Amendment Number ($amendmentNumber) = ($text =~ /Amendment No.(.*?\))/); $amendmentNumber =~ s/\)//gsi; # Getting name of issuer @lines = split(/\n/, $text); $lineindex = 0; foreach $line(@lines) { if($line =~ /Name of Issuer/i) { $issuer_b2 = ""; if($lines[$lineindex-2] !~ /Amendment|Schedule/i) { $issuer_b2 .= $lines[$lineindex-2]; } $issuer_b2 .= $lines[$lineindex-1]; last; } $lineindex++; } $issuer_b2 =~ s/-{3,}//gsi; $issuer_b2 =~ s/\s{2,}//gsi; $issuer_b2 =~ s/\n//gsi; # Getting Title of Class of securities ($securityTitle) = ($text =~ /Name of Issuer(.*?Title)/si); $securityTitle =~ s/Title//gsi; $securityTitle =~ s/-{3,}//gsi; $securityTitle =~ s/\s{2,}//gsi; $securityTitle =~ s/\n//gsi; $securityTitle =~ s/\(//gsi; $securityTitle =~ s/\)//gsi; # Getting CUSIP Number $lineindex = 0; foreach $line(@lines) { if($line =~ /CUSIP Number/i) { if($lines[$lineindex-2] !~ /Title/) { $cusip .= $lines[$lineindex-2]; } $cusip .= $lines[$lineindex-1]; last; } $lineindex++; } $cusip =~ s/-{3,}//gsi; $cusip =~ s/\s{2,}//gsi; $cusip =~ s/\n//gsi; # Getting Date of event ($eventDate) = ($text =~ /CUSIP Number(.*)Date of Event/si); $eventDate =~ s/-{3,}//gsi; $eventDate =~ s/\s{2,}//gsi; $eventDate =~ s/\n//gsi; $eventDate =~ s/\(//gsi; $eventDate =~ s/\)//gsi; # Getting Rule checkbox ($rule) = ($text =~ /Check\s+the\s+appropriate\s+box\s+to\s+designate(.*?1.*NAMES* OF REPORTING PERSON)/si); ($ruleb) = ($rule =~ /(.*Rule.*\(b\))/i); $ruleb =~ s/Rule.*(\(b\))/$1/i; $ruleb =~ s/\(|\)//gsi; $ruleb = &EDGARdoc::util::isChecked("b", $ruleb); ($rulec) = ($rule =~ /(.*Rule.*\(c\))/i); $rulec =~ s/Rule.*(\(c\))/$1/i; $rulec =~ s/\(|\)//gsi; $rulec = &EDGARdoc::util::isChecked("c", $rulec); ($ruled) = ($rule =~ /(.*Rule.*\(d\))/i); $ruled =~ s/Rule.*(\(d\))/$1/i; $ruled =~ s/\(|\)//gsi; $ruled = &EDGARdoc::util::isChecked("d", $ruled); $rule =""; if($ruleb) { $rule = "b"; } if($rulec) { $rule = "c"; } if($ruled) { $rule = "d"; } # Parsing cover section $coverdone = 0; $coverindex = 0; $newtext = $text; while($coverdone eq 0) { if($newtext =~ /1.*NAMES* OF REPORTING PERSON.*12.*TYPE OF REPORTING/si) { ($cover, $newtext) = ($newtext =~ /(1.*?NAMES* OF REPORTING PERSON.*?12.*?TYPE OF REPORTING.*?person.*?\n.*?\n)(.*)/si); if ($debug > 1) { print $cover, "\n"; } # Getting Filer Name ($filer) = ($cover =~ /1.*NAMES* OF REPORTING PERSONS*(.*)2.*CHECK THE APPROPRIATE/si); $filer =~ s/I\.*R\.*S\.*//gsi; $filer =~ s/\bIdentification\b//gsi; $filer =~ s/\bNos*\b|\bNumbers*\b//gsi; $filer =~ s/\bOF\b//si; $filer =~ s/\bABOVE\b//si; $filer =~ s/\bPERSONS*\b//gsi; $filer =~ s/\/|,|:/ /gsi; $filer =~ s/S\.*S\.* or//gsi; $filer =~ s/\(ENTITIES ONLY\)//gsi; $filer =~ s/-{3,}//gsi; ($irsId[$coverindex]) = ($filer =~ /(\d*-*)/); $filer =~ s/$irsId[$coverindex]//gsi; $filerName[$coverindex] = $filer; $filerName[$coverindex] =~ s/\.//gsi; # Getting group ($gr) = ($cover =~ /2.*CHECK THE APPROPRIATE BOX(.*)3.*SEC USE ONLY/si); $gr =~ s/If//gsi; $gr =~ s/a member//gsi; $gr =~ s/of//gsi; $gr =~ s/group//gsi; $gr =~ s/See//gsi; $gr =~ s/Instructions*//gsi; $gr =~ s/-{3,}//gsi; $gr =~ s/\s{2,}//gsi; $gr =~ s/\n//gsi; ($groupA, $groupB) = ($gr =~ /(\(a\).*)(\(b\).*)/si); $groupA =~ s/\(|\)//gsi; $groupB =~ s/\(|\)//gsi; $groupA .= "a"; $groupB .= "b"; $groupA = &EDGARdoc::util::isChecked("a", $groupA); $groupB = &EDGARdoc::util::isChecked("b", $groupB); if($groupA eq $groupB) { $group[$coverindex] = ""; } else { if($groupA) {$group[$coverindex] = "a"; } if($groupB) {$group[$coverindex] = "b"; } } # Getting SEC USE ONLY ($secOnly[$coverindex]) = ($cover =~ /3.*SEC USE ONLY(.*)4.*CITIZENSHIP OR PLACE OF ORGANIZATION/si); # Getting Citizenship ($citizenship[$coverindex]) = ($cover =~ /4.*CITIZENSHIP OR PLACE OF ORGANIZATION(.*)?5.*SOLE VOTING POWER/si); $citizenship[$coverindex] =~ s/NUMBER\s*OF.*//gsi; # Getting Number of shares beneficially owned ($sharesOwned) = ($cover =~ /(5.*SOLE VOTING POWER.*)9.*AGGREGATED* AMOUNT BENEFICIALLY/si); $sharesOwned =~ s/NUMBER//gsi; $sharesOwned =~ s/OF//gsi; $sharesOwned =~ s/SHARES//gsi; $sharesOwned =~ s/BENEFICIALLY//gsi; $sharesOwned =~ s/OWNED//gsi; $sharesOwned =~ s/BY//gsi; $sharesOwned =~ s/EACH//gsi; $sharesOwned =~ s/REPORTING//gsi; $sharesOwned =~ s/PERSON//gsi; $sharesOwned =~ s/WITH//gsi; ($soleVote[$coverindex]) = ($sharesOwned =~ /5.*SOLE VOTING POWER(.*)6.*SHARED VOTING POWER/si); ($sharedVote[$coverindex]) = ($sharesOwned =~ /6.*SHARED VOTING POWER(.*)7.*SOLE DISPOSITIVE POWER/si); ($soleDispositive[$coverindex]) = ($sharesOwned =~ /7.*SOLE DISPOSITIVE POWER(.*)8.*SHARED DISPOSITIVE POWER/si); ($sharedDispositive[$coverindex]) = ($sharesOwned =~ /8.*SHARED DISPOSITIVE POWER(.*)/si); # Getting Aggregate amount ($aggregateAmount[$coverindex]) = ($cover =~ /9.*AGGREGATE AMOUNT BENEFICIALLY.*REPORTING.*?PERSON(.*)10.*CHECK.*IF THE AGGREGATE/si); # Getting AggregateCheck ($aggregate[$coverindex]) = ($cover =~ /10.*CHECK BOX IF THE AGGREGATE.*CERTAIN(.*)11.*PERCENT OF CLASS/si); ($aggregate[$coverindex]) = ($aggregate[$coverindex] =~ /(\bX\b)/si); $aggregate[$coverindex] .= "Shares"; $aggregate[$coverindex] = &EDGARdoc::util::isChecked("Shares", $aggregate[$coverindex]); if($aggregate[$coverindex]) { $aggregate[$coverindex] = "Y"; } else { $aggregate[$coverindex] = "N"; } # Percent of Class ($classPercent[$coverindex]) = ($cover =~ /11.*PERCENT OF CLASS.*ROW \(*9\)*(.*)12.*TYPE OF REPORTING/si); # Role ($role[$coverindex]) = ($cover =~ /12.*TYPE OF REPORTING.*PERSON(.*)/si); $role[$coverindex] =~ s/See//gsi; $role[$coverindex] =~ s/Instructions//gsi; $coverindex++; } else { $coverdone = 1; } } # Check for different structure of cover section if($coverindex eq 0) { $coverdone = 0; $newtext = $text; while($coverdone eq 0) { if($newtext =~ /Item 1:.*Item 12:/si) { ($cover, $newtext) = ($newtext =~ /Item 1:(.*?Item 12:.*?\n)(.*)/si); ($filerName[$coverindex]) = ($cover =~ /Reporting Person - (.*)/i); ($group[$coverindex]) = ($cover =~ /Item 2:(.*)/i); ($citizenship[$coverindex]) = ($cover =~ /Item 4:(.*)/i); ($soleVote[$coverindex]) = ($cover =~ /Item 5:(.*)/i); ($sharedVote[$coverindex]) = ($cover =~ /Item 6:(.*)/i); ($soleDispositive[$coverindex]) = ($cover =~ /Item 7:(.*)/i); ($sharedDispositive[$coverindex]) = ($cover =~ /Item 8:(.*)/i); ($aggregateAmount[$coverindex]) = ($cover =~ /Item 9:(.*)/i); ($aggregate[$coverindex]) = ($cover =~ /Item 10:(.*)/i); ($classPercent[$coverindex]) = ($cover =~ /Item 11:(.*)/i); ($role[$coverindex]) = ($cover =~ /Item 12:(.*)/i); $coverindex++; } else { $coverdone = 1; } } } # Parsing Items Section ($item1a, $item2a, $item2b, $item2c, $item2d, $otherReceiver, $ownershipCeased, $roleCheck, $subsidiary, $groupId, $dissolution, $certification) = &getItemsSC13G($newtext); # Parsing Footer Section ($footer) = ($text =~ /Item 10(.*)/si); if ($debug > 1) { print "---------------------HEADER-----------------------------\n"; print "Amendment Number = $amendmentNumber\n"; print "Issuer Name = $issuer_b2\n"; print "Title of securities = $securityTitle\n"; print "CUSIP Number = $cusip\n"; print "Event Date = $eventDate\n\n\n\n"; print "--------------------COVER-------------------------------\n"; for($index = 0; $index < $coverindex; $index++) { print "Filer Name = $filerName[$index]\n"; print "IRS ID = $irsId[$index]\n"; print "Group = $group[$index]\n"; print "SEC Use Only = $secOnly[$index]\n"; print "Citizenship = $citizenship[$index]\n"; print "Sole Voting Power = $soleVote[$index]\n"; print "Shared Voting Power = $sharedVote[$index]\n"; print "Sole Dispositive Power = $soleDispositive[$index]\n"; print "Shared Dispositive Power = $sharedDispositive[$index]\n"; print "Aggregate Amount = $aggregateAmount[$index]\n"; print "Aggregate = $aggregate[$index]\n"; print "Class Percent = $classPercent[$index]\n"; print "Role = $role[$index]\n\n\n\n"; } print "----------------------ITEM--------------------------------\n"; print "Address = $item2b\n"; print "Other Receiver = $otherReceiver\n"; print "Ownership Ceased = $ownershipCeased\n"; print "Role Check = $roleCheck\n"; print "Subsidiary = $subsidiary\n"; print "Group ID = $groupId\n"; print "Dissolution = $dissolution\n"; print "Certification = $certification\n"; } $XMLout = "\n"; $XMLout .= "\n"; if($amendmentNumber =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($amendmentNumber); $XMLout .= "\n"; } $XMLout .= "\n"; $XMLout .= "" . EDGAR::XML::trim($securityTitle) ."\n"; ($month, $day, $year) = &EDGARdoc::util::getDateParts($eventDate); $XMLout .= &EDGARdoc::util::getDateXML("eventDate", $month, $day, $year); $XMLout .= "\n"; for($index=0; $index < $coverindex; $index++) { $XMLout .= "\n"; $XMLout .= "" . EDGAR::XML::trim($cusip) . "\n"; ($filerName[$index]) = ($filerName[$index] =~ /\s*(.*)/); # ($firstname, $middlename, $lastname) = split(/\s+/, $filerName[$index]); ($firstname, $middlename, $lastname) = ($filerName[$index] =~ /(.*)\s(.*)\s(.*)/); if(! ($lastname =~ /\w+/)) { $lastname = $middlename; $middlename = ""; } if(! ($firstname =~ /\w+/ && $lastname =~ /\w+/)) { ($firstname, $lastname) = ($filerName[$index] =~ /(.*)\s(.*)/); } if (($firstname =~ /\w+/) && ($lastname =~ /\w+/)) { $XMLout .= &EDGARdoc::util::getNameXML("filer", $firstname, $middlename, $lastname); if ($irsId =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($irsId[$index]) . "\n"; } $XMLout .= "\n"; } if ($secOnly =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($secOnly) . "\n"; } $citizenship[$index] =~ s/\n|:/ /gsi; $citizenship[$index] =~ s/-{3,}//gsi; $citizenship[$index] =~ s/_{3,}//gsi; $citizenship[$index] =~ s/\s{2,}//gsi; $XMLout .= "" . EDGAR::XML::trim($citizenship[$index]); $XMLout .= "\n"; $soleVote[$index] =~ s/\n|:/ /gsi; $soleVote[$index] =~ s/-{3,}//gsi; $soleVote[$index] =~ s/_{3,}//gsi; $soleVote[$index] =~ s/\s{2,}//gsi; $sharedVote[$index] =~ s/\n|:/ /gsi; $sharedVote[$index] =~ s/-{3,}//gsi; $sharedVote[$index] =~ s/_{3,}//gsi; $sharedVote[$index] =~ s/\s{2,}//gsi; $soleDispositive[$index] =~ s/\n|:/ /gsi; $soleDispositive[$index] =~ s/-{3,}//gsi; $soleDispositive[$index] =~ s/_{3,}//gsi; $soleDispositive[$index] =~ s/\s{2,}//gsi; $sharedDispositive[$index] =~ s/\n|:/ /gsi; $sharedDispositive[$index] =~ s/-{3,}//gsi; $sharedDispositive[$index] =~ s/_{3,}//gsi; $sharedDispositive[$index] =~ s/\s{2,}//gsi; $soleVote[$index] = EDGARdoc::util::toNumber($soleVote[$index]); $XMLout .= "\n"; $XMLout .= "\n"; } $classPercent[$index] =~ s/\n|:/ /gsi; $classPercent[$index] =~ s/-{3,}//gsi; $classPercent[$index] =~ s/_{3,}//gsi; $classPercent[$index] =~ s/\s{2,}//gsi; $classPercent[$index] = EDGARdoc::util::toNumber($classPercent[$index]); if ($classPercent[$index] =~ /\w+/) { $XMLout .= "" . $classPercent[$index] . "\n"; } $role[$index] =~ s/\n|:/ /gsi; $role[$index] =~ s/-{3,}//gsi; $role[$index] =~ s/_{3,}//gsi; @roles = split(/\s/, $role[$index]); $XMLout .= "\n"; foreach $ro(@roles) { if($ro =~ /bd|bk|ic|iv|ia|ep|hc|sa|cp|co|pn|in|oo/si) { ($rodata) = ($ro =~ /\s*(bd|bk|ic|iv|ia|ep|hc|sa|cp|co|pn|in|oo)\s*/si); $XMLout .= "\n"; } } $XMLout .= "\n"; $XMLout .= "\n"; } $XMLout .= "\n"; $XMLout .= "" . EDGAR::XML::trim($item2d) . "\n"; $XMLout .= "\n"; ($item2a) = ($item2a =~ /\s*(.*)/); # ($firstname, $middlename, $lastname) = split(/\s+/, $item2a); ($firstname, $middlename, $lastname) = ($item2a =~ /(.*)\s(.*)\s(.*)/); if(! ($lastname =~ /\w+/)) { $lastname = $middlename; $middlename = ""; } if (($lastname =~ /\w+/) && ($firstname =~ /\w+/)) { $XMLout .= &EDGARdoc::util::getNameXML("filer", $firstname, $middlename, $lastname); ($street, $city, $state, $zip, $country) = &getAddressParts($item2b); if ($street =~ /\w+/) { $XMLout .= "
\n"; $XMLout .= EDGARdoc::util::getPostalXML($street, $city, $state, $zip, $country); $XMLout .= "
\n"; } $XMLout .= "\n"; } $XMLout .= "" . EDGAR::XML::trim($item2c) . "\n"; @roles = split(/\s/, $roleCheck); $rodat = ""; foreach $ro(@roles) { if($ro =~ /\bbd\b|\bbk\b|\bic\b|\biv\b|\bia\b|\bep\b|\bhc\b|\bsa\b| \bcp\b|\bco\b|\bpn\b|\bin\b|\boo\b/si) { ($rodata) = ($ro =~ /(\bbd\b|\bbk\b|\bic\b|\biv\b|\bia\b|\bep\b|\bhc\b|\bsa\b|\bcp\b|\bco\b|\bpn\b|\bin\b|\boo\b)/si); $rodat = $rodat . lc($rodata) . ","; } } chop($rodat); if (length($rodat)) { $XMLout .= "\n"; } $XMLout .= "" . EDGAR::XML::trim($cusip) . "\n"; $XMLout .= "" . EDGAR::XML::trim($otherReceiver) . "\n"; for($index = 0; $index<$coverindex; $index++) { $aggregateAmount[$index] = EDGARdoc::util::toNumber($aggregateAmount[$index]); $XMLout .= "\n"; } $ownershipCeased = &EDGARdoc::util::isChecked(".", $ownershipCeased); if ($ownershipCeased) { $ownershipCeased = "Y"; } else { $ownedshipCeased = "N"; } $XMLout .= "\n"; if ($subsidiary =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($subsidiary) . "\n"; } if ($dissolution =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($dissolution) . "\n"; } $XMLout .= "
\n"; $XMLout .= "
\n"; $XMLout .= &getSCFooter($footer); for($index = 0; $index < $coverindex; $index++) { ($firstName, $middleName, $lastName) = split(/\s+/, $filerName[$index]); if(length($lastName) eq 0) { $lastName = $middleName; $middleName = ""; } $XMLout .= EDGARdoc::util::getNameXML("filer", $firstName, $middleName, $lastName); $XMLout .= "\n"; } $XMLout .= "
\n"; $XMLout .= "
\n"; if ($debug > 1) { print "----------------------------XML----------------------------\n"; print $XMLout, "\n"; } $XMLout = &EDGAR::XML::encode($XMLout); return $XMLout; } # parse document of type sc13d sub edgarParseSC13D { local($text) = @_; local($beneficiallyOwned) = ""; local($amendmentNumber, $issuerName, $securityTitle, $cusip, $contact, $eventDate, $filed13G, $paidFee, $XMLout, $month, $year, $day, $coverdone, $cover, $newtext, $filerName, $req, $irsId, $group, $groupa, $groupb, $secOnly, $fundSource, $disclosure, $citizenship, $sharesOwned, $soleVote, $sharedVote, $soleDispositive, $sharedDispositive, $aggregateAmount, $aggregate, $role, $percentClass, $firstName, $middleName, $lastName, $item2a, $item2b, $item2c, $item2d, $item2e, $item2f, $item5b, $item5d, $item5e, $considerationSource, $purpose, $contracts, $street, $city, $state, $zip, $country, $check, $footer); local(@reqdata); # Removing unnecessary info $text = &EDGAR::XML::decode($text); $text = &EDGARdoc::util::fixText($text); # Parsing Header Section # Getting the Amendment Number ($amendmentNumber) = ($text =~ /SCHEDULE 13D.*\(Amendment No.(.*?\))/si); $amendmentNumber =~ s/\)//; $amendmentNumber =~ s/\s{2,}//gsi; $amendmentNumber =~ s/\n//gsi; # Getting Name of Issuer if($text =~ /Amendment.*?\n.*\(Name of Issuer/si) { ($issuerName) = ($text =~ /\(Amendment.*?\n(.*)\(Name of Issuer/si); } else { ($issuerName) = ($text =~ /Act of 1934(.*)\(Name of Issuer/si); } $issuerName =~ s/-{3,}//gsi; $issuerName =~ s/\s{2,}//gsi; $issuerName =~ s/\n//gsi; # Getting Title of Class of securities ($securityTitle) = ($text =~ /\(Name of Issuer.*?\n(.*)\(Title of Class/si); $securityTitle =~ s/-{3,}//gsi; $securityTitle =~ s/\s{2,}//gsi; $securityTitle =~ s/\n//gsi; # Getting CUSIP Number ($cusip) = ($text =~ /\(Title of Class.*?\n(.*)\(CUSIP Number/si); $cusip =~ s/-{3,}//gsi; $cusip =~ s/\s{2,}//gsi; $cusip =~ s/\n//gsi; # Getting Contact ($contact) = ($text =~ /\(CUSIP Number.*?\n(.*)\(Name, Address and Telephone/si); $contact =~ s/-{3,}//gsi; # Getting eventDate ($eventDate) = ($text =~ /\(Name, Address.*Communications\)?\n(.*)\(Date of Event/si); $eventDate =~ s/-{3,}//gsi; $eventDate =~ s/\s{2,}//gsi; $eventDate =~ s/\n//gsi; # Getting Check Box Header ($filed13G) = ($text =~ /(If the filing person.*?\.)/si); if($filed13G =~ /\w+/) { $filed13G = EDGARdoc::util::isChecked('.', $filed13G); } ($paidFee) = ($text =~ /(Check the following.*?\.)/si); if($paidFee =~ /\w+/) { $paidFee = EDGARdoc::util::isChecked('.', $paidFee); } if ($debug > 1) { print "------------------HEADER-----------------\n"; print "Amendment Number = $amendmentNumber\n"; print "Issuer Name = $issuerName\n"; print "Security Title = $securityTitle\n"; print "CUSIP Number = $cusip\n"; print "Contact = $contact\n"; print "Event Date = $eventDate\n"; print "filed 13 G = $filed13G\n"; print "Paid Fee = $paidFee\n\n\n\n"; print "--------------------XML OUTPUT-----------------\n"; } # XMLize the output $XMLout = ""; $XMLout .= "\n"; $XMLout .= "\n"; if ($amendmentNumber =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($amendmentNumber); $XMLout .= "\n"; } $XMLout .= "\n"; $XMLout .= "" . EDGAR::XML::trim($securityTitle) . "\n"; ($month, $day, $year) = EDGARdoc::util::getDateParts($eventDate); $XMLout .= EDGARdoc::util::getDateXML("eventDate", $month, $day, $year); if($filed13G eq 0 || $filed13G eq 1 || $paidFee eq 0 || $paidFee eq 1) { $XMLout .= ")(.*)/si); if ($debug > 1) { print "----------------------------------------------\n"; print $cover, "\n"; print "----------------------------------------------\n\n\n\n"; } # Getting Name of reporting person ($filerName) = ($cover =~ /NAMES* OF REPORTING PERSONS*(.*)2.*Check the\s*Appropriate/si); $filerName =~ s/IDENTIFICATION NOS*\. OF ABOVE PERSONS*//si; $filerName =~ s/S\.S\. OR//si; $filerName =~ s/ss//si; $filerName =~ s/I\.R\.S\.//si; $filerName =~ s/irs//si; $filerName =~ s/\(ENTITIES ONLY\)//si; $filerName =~ s/ID/\n/si; $filerName =~ s/-{3,}//; $filerName =~ s/\s+/ /; $filerName =~ s/Number//; @reqdata = split(/\n/, $filerName); $filerName = ""; foreach $req(@reqdata) { if($req =~ /\w/) { if($req =~ /\d/) { $irsId = $req; } else { $filerName .= $req; $filerName .= " "; } } } # Getting Check member of a group ($group) = ($cover =~ /CHECK THE APPROPRIATE.*MEMBER OF A GROUP(.*)3\.*\s*SEC USE/si); $group =~ s/-{3,}//gsi; $group =~ s/\(//gsi; $group =~ s/\)//gsi; $groupa = &EDGARdoc::util::isChecked("a", $group); ($groupb) = ($group =~ /(b.*)/si); $groupb = &EDGARdoc::util::isChecked("b", $groupb); if($groupa eq 1) { $group = 'a'; } if($groupb eq 1) { $group = 'b'; } if($groupa eq 0 && $groupb eq 0) { $group = ''; } if($groupa eq 1 && $groupb eq 1) { $group = ''; } # Getting SEC only ($secOnly) = ($cover =~ /SEC USE ONLY(.*)4\.*\s*SOURCE OF FUNDS/si); $secOnly =~ s/-{3,}//; # Getting Source of Funds ($fundSource) = ($cover =~ /SOURCE OF FUNDS:*\**(.*)5\.*\s*CHECK/si); $fundSource =~ s/-{3,}//; $fundSource =~ s/\n//; $fundSource =~ s/\s{2,}/ /; # Getting Check if disclosure of legal proceedings ($disclosure) = ($cover =~ /(5\.*\s*CHECK.*DISCLOSURE)6\.*\s*CITIZENSHIP/si); $disclosure =~ s/-{3,}//; $disclosure = EDGARdoc::util::isChecked("(e)", $disclosure); # Getting Citizenship ($citizenship) = ($cover =~ /CITIZENSHIP.*OF ORGANIZATION(.*)7\.*\s*SOLE VOTING POWER/si); $citizenship =~ s/-{3,}//; $citizenship =~ s/NUMBER OF//gsi; # Getting Number of shares beneficially owned ($sharesOwned) = ($cover =~ /(SOLE VOTING POWER.*)11\.*\s*AGGREGATE AMOUNT BENEFICIALLY/si); $sharesOwned =~ s/NUMBER//si; $sharesOwned =~ s/OF//si; $sharesOwned =~ s/SHARES//si; $sharesOwned =~ s/BENEFICIALLY//si; $sharesOwned =~ s/BENE-//si; $sharesOwned =~ s/FICIALLY//si; $sharesOwned =~ s/OWNED//si; $sharesOwned =~ s/BY//si; $sharesOwned =~ s/EACH//si; $sharesOwned =~ s/REPORTING//si; $sharesOwned =~ s/PERSON//si; $sharesOwned =~ s/WITH//si; ($soleVote) = ($sharesOwned =~ /SOLE VOTING POWER(.*)8\.*\s*SHARED VOTING POWER/si); $soleVote =~ s/-{3,}//gsi; ($sharedVote) = ($sharesOwned =~ /SHARED VOTING POWER(.*)9\.*\s*SOLE DISPOSITIVE POWER/si); $sharedVote =~ s/-{3,}//gsi; ($soleDispositive) = ($sharesOwned =~ /SOLE DISPOSITIVE POWER(.*)10\.*\s*SHARED DISPOSITIVE POWER/si); $soleDispositive =~ s/-{3,}//gsi; ($sharedDispositive) = ($sharesOwned =~ /SHARED DISPOSITIVE POWER(.*)/si); $sharedDispositive =~ s/-{3,}//gsi; # Getting Aggregate amount beneficially owned ($aggregateAmount) = ($cover =~ /AGGREGATE AMOUNT.*PERSON(.*)12\.*\s*CHECK(.*)AGGREGATE/si); $aggregateAmount =~ s/-{3,}//gsi; # Getting Check box if the aggregate amount in row 11 excludes certain shares ($aggregate) = ($cover =~ /12\.*\s*(.*)13\.*\s*PERCENT OF CLASS/si); $aggregate =~ s/-{3,}//gsi; $aggregate = EDGARdoc::util::isChecked("SHARES*", $aggregate); # Getting Percent of class ($percentClass) = ($cover =~ /PERCENT OF CLASS.*ROW\s*\(*11\)*(.*)14\.*\s*TYPE/si); $percentClass =~ s/-{3,}//gsi; # Getting Person Type ($role) = ($cover =~ /TYPE OF REPORTING PERSON(.*)/s); $role =~ s/\*//gsi; $role =~ s/-{3,}//gsi; ($role) = ($role =~ /\s*(BD|BK|IC|IV|IA|EP|HC|SA|CP|CO|PN|IN|OO).*/si); if ($debug > 1) { print "------------------------COVER------------------------\n"; print "Filer Name = $filerName\n"; print "IRS ID = $irsId\n"; print "Group = $groupa $groupb\n"; print "SEC Only = $secOnly\n"; print "Fund Source = $fundSource\n"; print "Disclosure = $disclosure\n"; print "Citizenship = $citizenship\n"; print "Sole Vote = $soleVote\n"; print "Shared Vote = $sharedVote\n"; print "Sole Dispositive = $soleDispositive\n"; print "Shared Dispositive = $sharedDispositive\n"; print "Aggregate Amount = $aggregateAmount\n"; print "Aggregate = $aggregate\n"; print "Percent Class = $percentClass\n"; print "Role = $role\n"; } if($newtext =~ /1\s*NAMES* OF REPORTING.*?/si) { $coverdone = 0; } else { $coverdone = 1; } # XMLize the output $XMLout .= "" . EDGAR::XML::trim($cusip) . "\n"; ($filerName) = ($filerName =~ /(\w.*)/); ($firstName, $middleName, $lastName) = split(/\s+/, $filerName); if(length($lastName) eq 0) { $lastName = $middleName; $middleName = ""; } if (($firstName =~ /\w+/) && ($lastName =~ /\w+/)) { $XMLout .= EDGARdoc::util::getNameXML("filer", $firstName, $middleName, $lastName); if($irsId =~ /\d*/) { $irsId =~ s/://; $irsId =~ s/\s{2,}//; if ($irsId =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($irsId) . "\n"; } } $XMLout .= "\n"; } ($secOnly) = ($secOnly =~ /\s*(\w.*)/); if ($secOnly =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($secOnly) . "\n"; } ($fundSource) = ($fundSource =~ /.*(sc|bk|af|wc|pf|oo).*/i); if (length($fundSource)) { $XMLout .= "\n"; } $citizenship =~ s/\n//gsi; $citizenship =~ s/\s{2,}//gsi; if ($citizenship =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($citizenship) . "\n"; } $soleVote =~ s/\n//gsi; $soleVote =~ s/\s{2,}//gsi; $sharedVote =~ s/\n//gsi; $sharedVote =~ s/\s{2,}//gsi; $soleDispositive =~ s/\n//gsi; $soleDispositive =~ s/\s{2,}//gsi; $sharedDispositive =~ s/\n//gsi; $sharedDispositive =~ s/\s{2,}//gsi; $soleVote = EDGARdoc::util::toNumber($soleVote); $XMLout .= "\n"; $XMLout .= "\n"; } $percentClass =~ s/\n//gsi; $percentClass =~ s/\s{2,}//gsi; $percentClass = EDGARdoc::util::toNumber($percentClass); if ($percentClass =~ /\w+/) { $XMLout .= "" . $percentClass . "\n"; } $role =~ s/\n|:/ /gsi; $role =~ s/-{3,}//gsi; $role =~ s/_{3,}//gsi; @roles = split(/\s/, $role); $XMLout .= "\n"; foreach $ro(@roles) { if($ro =~ /bd|bk|ic|iv|ia|ep|hc|sa|cp|co|pn|in|oo/si) { ($rodata) = ($ro =~ /\s*(bd|bk|ic|iv|ia|ep|hc|sa|cp|co|pn|in|oo)\s*/si); $XMLout .= "\n"; } } $XMLout .= "\n"; $beneficiallyOwned .= ""; if (length($securityTitle)) { $beneficiallyOwned .= "" . EDGAR::XML::trim($securityTitle); $beneficiallyOwned .= "\n"; } $beneficiallyOwned .= ""; $XMLout .= "\n"; } # Parsing items section ($item2a, $item2b, $item2c, $item2d, $item2e, $item2f, $item5b, $item5d, $item5e, $considerationSource, $purpose, $contracts) = &getItemsSC13D($newtext); $XMLout .= "\n"; $XMLout .= "" . EDGAR::XML::trim($securityTitle) . "\n"; $XMLout .= "\n"; ($item2a) = ($item2a =~ /\s*(.*)/); ($firstname, $middlename, $lastname) = ($item2a =~ /(.*)\s(.*)\s(.*)/); if(! ($lastname =~ /\w+/)) { $lastname = $middlename; $middlename = ""; } if (($lastname =~ /\w+/) && ($firstname =~ /\w+/)) { $XMLout .= &EDGARdoc::util::getNameXML("filer", $firstname, $middlename, $lastname); $XMLout .= "\n"; } $XMLout .= "\n"; ($street, $city, $state, $zip, $country) = &getAddressParts($item2b); if ($street =~ /\w+/) { $XMLout .= "
\n"; $XMLout .= EDGARdoc::util::getPostalXML($street, $city, $state, $zip, $country); $XMLout .= "
\n"; } $XMLout .= "
\n"; if ($item2d =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($item2d) . "\n"; } if ($item2e =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($item2e) . "\n"; } if ($item2f =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($item2f) . "\n"; } $XMLout .= "" . EDGAR::XML::trim($cusip) . "\n"; $XMLout .= "" . EDGAR::XML::trim($item5d) . "\n"; if ($considerationSource =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($considerationSource); $XMLout .= "\n"; } if ($considerationSource =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($purpose) . "\n"; } $XMLout .= "\n"; $XMLout .= "$beneficiallyOwned\n"; if ($item5b =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($item5b) . "\n"; } if ($item5d =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($item5d) . "\n"; } $check = "N"; if($item5e =~ /\bX\b/si) { $check = "Y"; } $XMLout .= "\n"; $XMLout .= "\n"; if ($contracts =~ /\w+/) { $XMLout .= "" . EDGAR::XML::trim($contracts) . "\n"; } $XMLout .= "
\n"; $XMLout .= "
\n"; # Parsing Footer Section # Looking for pattern Item 7 if($text =~ /Item 7/) { ($footer) = ($text =~ /Item 7.*Signature(.*)/si); ($footer) = ($footer =~ /(.*?PAGE)/si); } else { ($footer) = ($text =~ /Signature(.*)/si); } $XMLout .= &getSCFooter($footer); ($firstName, $middleName, $lastName) = split(/\s+/, $filerName); if(length($lastName) eq 0) { $lastName = $middleName; $middleName = ""; } if (($firstName =~ /\w+/) && ($lastName =~ /\w+/)) { $XMLout .= EDGARdoc::util::getNameXML("filer", $firstName, $middleName, $lastName); $XMLout .= "\n"; } $XMLout .= "
\n"; $XMLout .= "
\n"; if ($debug > 1) { print $XMLout, "\n"; } $XMLout = &EDGAR::XML::encode($XMLout); $XMLout; } 1; # keep require happy =head1 NAME edgar-forms.pl - EDGAR forms parsing routines =head1 PACKAGE EDGAR::Forms =head1 SYNOPSIS require edgar-forms.pl; =head1 REQUIRES Perl, version 5.001 or higher. =head1 DESCRIPTION parsed EDGAR forms support. =over 3 =head1 METHODS =head2 parse =item * calls the correct form parsing routine. =item * Parsed XML is returned. =item example: my ($xml) = &EDGAR::Forms::parse("144", $text); =head2 getSectionsFor3_4_5 =item * Pulls out the header, body, table 1, table 2, and footer sections for forms of type 3, 4, and 5. =item * Form sections for the following are returned: header, body, table 1, table 2, and footer. =item example: ($header, $initialQuestions, $table1, $table2, $footer) = getSectionsFor3_4_5($initialText); =head2 edgarParse3 =item * SEC EDGAR Form type 3 parsing routine. =item * Parsed XML is returned. =item example: my ($xml) = &EDGAR::Forms::edgarParse3($text); =head2 edgarParse4 =item * SEC EDGAR Form type 4 parsing routine. =item * Parsed XML is returned. =item example: my ($xml) = &EDGAR::Forms::edgarParse4($text); =head2 edgarParse5 =item * SEC EDGAR Form type 5 parsing routine. =item * Parsed XML is returned. =item example: my ($xml) = &EDGAR::Forms::edgarParse5($text); =head2 edgarParse144 =item * SEC EDGAR Form type 144 parsing routine. =item * Parsed XML is returned. =item example: my ($xml) = &EDGAR::Forms::edgarParse144($text); =head2 getAddressParts =item * parses out address parts. =item * Address info is returned: street, city, state, zip, country. =item example: ($street, $city, $state, $zip, $country) = &getAddressParts($addrText); =head2 edgarParseSC13G =item * SEC EDGAR Form type SC 13G parsing routine. =item * Parsed XML is returned. =item example: my ($xml) = &EDGAR::Forms::edgarParseSC13G($text); =head2 edgarParseSC13D =item * SEC EDGAR Form type SC 13D parsing routine. =item * Parsed XML is returned. =item example: my ($xml) = &EDGAR::Forms::edgarParseSC13D($text); =back =head1 COPYRIGHT Copyright 1999 Invisible Worlds. =cut