#!/skyview/bin/perl -w #****************************************************************************** =pod =head1 NAME votable_dump - Dump fields from a VOTABLE XML file =head1 SYNOPSIS votable_dump [--debug] [--help] [--outsep=s] [--verbose] [--version] [--warn] path [col1 ...] where: --debug - Produce debugging output. --help - Display a help message describing command-line options. --outsep=s - Use string 's' as the output field separator (default is tab). --verbose - Turn on verbose output. --version - Print the program version number. --warn - Print warning messages for recoverable errors. path - Path to VOTABLE file. coln - One or more FIELD names or numbers to dump, starting at 0. =head1 DESCRIPTION This program dumps the contents of the specified fields from each row of a VOTABLE file. =head1 WARNINGS The file must contain a single RESOURCE/TABLE/DATA/TABLEDATA set. The program aborts with a fatal error if an invalid FIELD is specified. If a FIELD has a number as a name, it is used rather than the corresponding FIELD of that number. This should _not_ happen - do not use numbers for the names of FIELD elements! =head1 VERSION $Id: votable_dump,v 1.1.1.6 2003/07/25 14:12:28 elwinter Exp $ =head1 AUTHOR Eric Winter (Eric.L.Winter.1@gsfc.nasa.gov) =cut #****************************************************************************** # Revision history # $Log: votable_dump,v $ # Revision 1.1.1.6 2003/07/25 14:12:28 elwinter # Added ability to specify FIELD elements by name as well as numbers. # # Revision 1.1.1.5 2003/07/24 18:43:29 elwinter # Swapped order of TR and TD element checks for better speed. # # Revision 1.1.1.4 2003/05/29 16:35:39 elwinter # Modified to use XML::Parser::PerlSAX, since the LibXML-based SAX module # has a serious memory leak. # # Revision 1.1.1.3 2003/05/29 12:34:38 elwinter # Updated to use SAX so that large documents may be processed. # # Revision 1.1.1.2 2003/05/29 11:41:18 elwinter # Converted program to standard format. # # Revision 1.1.1.1 2003/05/29 11:09:16 elwinter # Placeholder for new branch. # # Revision 1.1 2003/05/29 11:08:51 elwinter # Initial revision # #****************************************************************************** # Begin main module. package main; #****************************************************************************** # Compiler pragmas. # Turn on strict everything. use diagnostics; use strict; use warnings; #****************************************************************************** # Specify external modules to use. # Standard modules. use Data::Dumper; use Getopt::Long; use XML::Parser::PerlSAX; # Third-party modules. # Project modules. #****************************************************************************** # Constants # Program version use constant VERSION => '$Revision: 1.1.1.6 $ '; # Default output separator. my($DEFAULT_OUTPUT_SEPARATOR) = "\t"; #****************************************************************************** # Globals # Set this flag to generate debugging output. my($debug); # Set this flag if the user wants help. my($help); # Output field separator string. my($outsep) = $DEFAULT_OUTPUT_SEPARATOR; # Set this flag for verbose operation. my($verbose); # Set this flag to print the program version number. my($version); # Set this flag to print warning messages. my($warn); # List of option specifiers, and hash to hold mapping of command-line # options and arguments for GetOptions(). my(@optlist) = ( 'debug', 'help', 'outsep=s', 'verbose', 'version', 'warn', ); my(%optctl) = ( 'debug' => \$debug, 'help' => \$help, 'outsep' => \$outsep, 'verbose' => \$verbose, 'version' => \$version, 'warn' => \$warn, ); #****************************************************************************** # Subroutine declarations sub _GetHelp(); sub _GetVersion(); sub _ValidOptions(); sub _MainLoop(); #****************************************************************************** # Local utility subroutines # Debugging dump routine. sub _dump { print("(DEBUG) $_[0]\n"); print Dumper($_[1]); } #****************************************************************************** # Main block. # This block processes command-line options and invokes the main code. { # Process command-line arguments. GetOptions(\%optctl, @optlist); _dump('optctl', \%optctl) if $debug; _dump('ARGV', \@ARGV) if $debug; # Print help message if needed. if ($help) { print(_GetHelp()); exit(0); } # Print the program version number if needed. if ($version) { print(_GetVersion(), "\n"); exit(0); } # Validate the options. exit(1) if (not _ValidOptions()); # Beyond this point, the code may safely assume that all values # and flags derived from the command line are valid and internally # consistent. #-------------------------------------------------------------------------- # Enter the main program loop. die('Fatal error!') if (not _MainLoop()); #-------------------------------------------------------------------------- # Exit normally. exit(0); } #****************************************************************************** # _GetHelp() # This internal subroutine returns a string containing help text for # the program. sub _GetHelp() { return(<<_EOS_ votable_dump [--debug] [--help] [--outsep=s] [--verbose] [--version] [--warn] path [col1 ...] where: --debug - Produce debugging output. --help - Display a help message describing command-line options. --outsep=s - Use string 's' as the output field separator (default is tab). --verbose - Turn on verbose output. --version - Print the program version number. --warn - Print warning messages for recoverable errors. path - Path to VOTABLE file. coln - One or more FIELD names or numbers to dump, starting at 0. _EOS_ ); } #****************************************************************************** # _GetVersion() # This internal subroutine returns a string containing the program # version number. The version string is maintained by RCS. sub _GetVersion() { return((VERSION =~ /(\d+(\.\d+)+)/)[0]); } #****************************************************************************** # _ValidOptions() # This internal subroutine validates all of the options specified on # the command line. Return 1 if all options are valid, and 0 # otherwise. sub _ValidOptions() { # A VOTABLE file must be specified. if (@ARGV < 1) { warn('ERROR: A VOTABLE file must be specified!'); return(0); } # Return normally. return(1); } #****************************************************************************** sub _MainLoop() { # Local variables # Path to VOTABLE file to dump. my($path); # Array of column numbers to dump. my(@cols); # SAX handler for parsing. my($handler); # SAX parser. my($parser); #-------------------------------------------------------------------------- # Save the name of the VOTABLE file, and the column numbers. ($path, @cols) = @ARGV; _dump('path', $path) if $debug; _dump('cols', \@cols) if $debug; # Create the SAX event handler. $handler = VOTable_Dumper->new(@cols) or die('Unable to create SAX event handler!'); # Create the parser. $parser = XML::Parser::PerlSAX->new(Handler => $handler) or die('Unable to create parser!'); # Parse the file. $parser->parse(Source => {SystemId => $path}) or die("Error parsing $path!"); #-------------------------------------------------------------------------- # Return normally. return(1); } #****************************************************************************** #****************************************************************************** # Begin the SAX handler. package VOTable_Dumper; #****************************************************************************** # Compiler pragmas. # Turn on strict everything. use diagnostics; use strict; use warnings; #****************************************************************************** # Specify external modules to use. # Standard modules. # Third-party modules. # Project modules. #****************************************************************************** # Object setup #****************************************************************************** # Constants #****************************************************************************** # Globals # Array to accumulate field values for the current record. my(@fields); # Set this flag when the parser is within a TD element. my($in_TD); #****************************************************************************** sub new() { # Save arguments. my($class, @cols) = @_; # Create the object, storing the list of columns as an array. my($self) = bless { user_cols => [@cols], field_list => [], field_map => {}, cols => [], } => $class; # Return the new object. return($self); } #****************************************************************************** sub start_element() { # Save arguments. my($self, $element) = @_; # Local variables # Name or number for current FIELD. my($field_spec); #-------------------------------------------------------------------------- # Process the element if it's important. if ($element->{Name} eq 'TD') { # A new TD element is starting, so set the in_TD flag. $in_TD = 1; # Initialize the string for this field. push(@fields, ''); } elsif ($element->{Name} eq 'TR') { # A new TR element is starting, so clear the fields array. @fields = (); } elsif ($element->{Name} eq 'FIELD') { # Save the name of this FIELD and its position. $self->{field_map}{$element->{Attributes}{name}} = scalar(@{$self->{field_list}}); push(@{$self->{field_list}}, $element->{Attributes}{name}); } elsif ($element->{Name} eq 'DATA') { # Now that all of the FIELD elements have been read, map the # user specifications to column numbers. Check to see if the # FIELD specification is a FIELD name. If not, check if it a # FIELD number. If neither, abort with a fatal error. foreach $field_spec (@{$self->{user_cols}}) { if (exists($self->{field_map}{$field_spec})) { push(@{$self->{cols}}, $self->{field_map}{$field_spec}); } elsif ($field_spec =~ /^\d+$/ and exists($self->{field_list}[$field_spec])) { push(@{$self->{cols}}, $field_spec); } else { warn("Invalid FIELD specifier ($field_spec)!"); exit(1); } } } } #****************************************************************************** sub end_element() { # Save arguments. my($self, $element) = @_; #-------------------------------------------------------------------------- # Process the element if it's important. if ($element->{Name} eq 'TR') { # This row is done, so print the requested fields. print(join($outsep, @fields[@{$self->{cols}}]), "\n"); } elsif ($element->{Name} eq 'TD') { # Clear the in_TD flag. $in_TD = 0; } undef $element; } #****************************************************************************** sub characters() { # Save arguments. my($self, $characters) = @_; #-------------------------------------------------------------------------- # If within a TD element, save the text. $fields[-1] .= $characters->{Data} if $in_TD; }