#!/usr/bin/perl use Getopt::Std; # Routine to generate an index file from PDS labels by extracting keywords # from the labels as specified by the input. # # 15 Mar 1995, acr. # 21 Apr 1995, acr: shaking out the bugs # 17 May 1995, acr: made ".LBL" extension check case-insensitive # 19 Jun 1995, acr: fixed file name case bug in 'runlabels' # 13 Jul 1995, acr: REALLY made ".LBL" extension check case-insensitive; # fixed bug which kept negative widths from left-justifying # 13 Nov 1995, acr: fixed leftover bug with last keyword width being negative # 29 Apr 1996, acr: fixed bug that caused keywords inside objects to be ignored # 30 May 1996, acr: made %val array local to &process_label to fix bug in # which old values of keywords missing in subsequent labels # were repeated # 08 Aug 1996, acr: Added some comments, removed some debug flags # 04 Mar 1997, acr: Fixed processing of object numbering to restart numbering # in each label # 19 May 1997, acr: beefed-up label line processing so it wouldn't choke on # '=' not surrounded by space, among other things; tweaked # ^PATH handling a bit so that relative ("../" etc.) paths # and the trailing slash are clipped. # 02 Feb 1999, acr: Removed gratuitous blank written onto the end of the last # field in each line. # 06 Mar 2003, acr: Added FILE_SPECIFICATION_NAME handling # 01 Oct 2003, acr: Fixed bug that wouldn't allow negative widths (for left- # justifying fields) from the command line. # 06 Dec 2005, acr: Modified to handle namespace IDs (local data dictionary # support; modified default fields list to coincide with # current INDEX_TABLE requirements; added default value # handling; use the now-standard Getopts package # # Input options: # # -a Check all files for attached labels # -n Do not use default keywords # -r Descend recursively through subdirectories # -o Send output to # -f Read keywords from # -d Start at directory # -v Include "VOLUME_ID" in default list. # # # Define the list of valid special fields as an associative array for easy # searching: %SPECIAL = ('^PATH', 1, '^FILE', 1, '^LABEL', 1, '^OFFSET', 1, '^OBJECT', 1, '^FILE_SPECIFICATION_NAME', 1); # # Start by dealing with the command line arguments and collecting the various # flags, fields, and file names. Getopts collects the options and their # arguments, adjusting the ARGV array accordingly. # getopts('anro:f:d:v'); # # Formatting and keyword selection are taken care of through two arrays: # one associative, the other a straight array. The associative array is # indexed on keyword name (including path) and returns the number of bytes # for the output field width. The second contains a list of the keywords # to be indexed, in the order in which they appeared on the command line # and/or in the field list file (default fields come first). This is used # to output the fields in order. A third array is used to hold any literal # values provided for use when the indicated element is not present in a # label. # # # If the "-n" switch was used, then no default fields are defined. # Otherwise, start with the default field list: # if (!$opt_n) { if ($opt_v) # Include VOLUME_ID { $num_fields = 5; %size = ( '^FILE_SPECIFICATION_NAME', -50, 'PRODUCT_ID', -30, 'DATA_SET_ID', -40, 'VOLUME_ID', -11, 'PRODUCT_CREATION_TIME', 19 ); @name = ( '^FILE_SPECIFICATION_NAME', 'PRODUCT_ID', 'DATA_SET_ID', 'VOLUME_ID', 'PRODUCT_CREATION_TIME' ); @literal = ( "", "", "", "NULL_0001", ""); } else { $num_fields = 4; %size = ( '^FILE_SPECIFICATION_NAME', -50, 'PRODUCT_ID', -30, 'DATA_SET_ID', -40, 'PRODUCT_CREATION_TIME', 19 ); @name = ( '^FILE_SPECIFICATION_NAME', 'PRODUCT_ID', 'DATA_SET_ID', 'PRODUCT_CREATION_TIME' ); @literal = ( "", "", "", "" ); } } # # Look for an explicit output file and open it: # if ($opt_o) { open (OUTPUT,">$opt_o") || die "Could not open '$opt_o' for output.\n"; } # # Next, check to see if a file of field descriptions was included. If so, # read it: # if ($opt_f) { open (FIELDS,$opt_f) || die "Could not open '$opt_f'\n"; while ($line=) { chop $line; if ( $line =~/^\s*$/ || $line =~ /^\s*#.*$/ ) { next; } $line =~ s/#.*$//; # Delete trailing comments $line =~ s/\s//g; # Delete whitespace ($field,$width,$val) = split(/\//,$line); # Split into fields if ($width eq "") # Insert default width { $width = ($val eq "")? 10 : length($val); } if ($width !~ /^(-|)[0-9]+$/) # Not a number - assume lit. { $val = $width; $width = length($val); } $tmpsize[$num_fields] = $width; $name[$num_fields] = $field; $literal[$num_fields] = $val; $num_fields++; } close(FIELDS); } # # Finally, retrieve field names from the command line, if any: # for ($i=0; $i<=$#ARGV; $i++) # Get field descriptions { ($fname, $fwidth, $val) = split(/\//,$ARGV[$i]); if ($fwidth eq "") { $fwidth = ($val eq "")? 10 : length($val); } if ($fwidth !~ /^(-|)[0-9]+$/) { $val = $fwidth; $fwidth = length($val); } $tmpsize[$num_fields] = $fwidth; $name[$num_fields] = $fname; $literal[$num_fields] = $val; $num_fields++; } # # Format and do syntax-checking for all collected field names: # $num_fields = &checknames($num_fields,*name,*tmpsize); if ($num_fields < 1) { printf STDOUT "pdsidx: no valid fields entered.\n"; exit; } # # And store output field sizes in an associative array: # for ($i=0; $i<$num_fields; $i++) { if ($tmpsize[$i] != 0) # Special Fields may have no size in the array { $size{$name[$i]} = $tmpsize[$i]; } } ## *** print keyword list *** ##for ($i=0; $i<$num_fields; $i++) ## { printf "Keyword: %-50.50s Width: %2d\n", $name[$i], $size{$name[$i]}; ## } # # Determine the starting directory: # if ($opt_d) { if (-d $opt_d) # Make sure it's a directory { $start_dir = $opt_d; } else { printf STDOUT "pdsidx: Invalid root directory (%s).\n", $opt_d; exit; } } else { $start_dir = '.'; } # # Now call the recursive routine which will process this and all # subdirectories, if requested: # &runlabels($start_dir,$opt_r); #============================================================================= sub checknames { local ($n,*fldnm,*width) = @_; local ($name,$i,$j,$k,$count,@tmpname,@tmpsize); local ($elm,$ldd); # First, run through the names, do syntax-checking and put in # uniform format: foreach $name (@fldnm) { # Delete blanks: $name =~ s/ *//g; # Force to upper case: $name =~ tr/a-z/A-Z/; # First, check for special fields (beginning with '^'): if ($name =~ /^\^\w+/) { if (!$SPECIAL{$name}) # Is it on the Special Field list? { printf STDOUT "pdsidx: Invalid field name, \"%s\" (%s).\n", $name,"not a special field"; $name = ""; } next; # No more checks for special names } # If there's a namespace, split if off, validate it and save it: if ($name =~ /:/) { ($ldd,$elm) = split(/:/,$name); if ($ldd !~ /^[A-Z][A-Z_0-9]*$/) { printf STDOUT "pdsidx: Invalid namespace, \"%s\" (%s).\n", $ldd, "invalid character"; $name = ""; next; } } else { $elm = $name; undef $ldd; } # Make sure there are only valid characters in the element name, and # that it begins with a letter: if ($elm !~ /^[A-Z][A-Z_0-9\-\[\]\.]*[^\.]$/) { printf STDOUT "pdsidx: Invalid field name, \"%s\" (%s).\n", $name, "invalid character"; $name = ""; next; } # Insert "[1]" before unnumbered objects: $elm =~ s/([A-Z]+)\./\1\[1\]./g; $name = ($ldd)? "$ldd:$elm" : $elm; # Check for equal numbers of []: if (split(/\[/,$name) != split(/\]/,$name)) { printf STDOUT "pdsidx: Invalid field name, \"%s\" (%s).\n", $name, "unbalanced brackets"; $name = ""; next; } # If there are brackets, make sure there are only digits # between them: if ( ($name =~ /\[/) && ($name =~ /\[\d*[^0-9\]]\d*\]/) ) { printf STDOUT "pdsidx: Invalid field name, \"%s\" (%s).\n", $name, "invalid subscript"; $name = ""; next; } } # Now, step through the lists and remove null field names: $count = 0; for ($i=0; $i<$n; $i++) { if ($fldnm[$i] ne "") { $tmpname[$count] = $fldnm[$i]; $tmpsize[$count] = $width[$i]; $count++; } } @fldnm = @tmpname; @width = @tmpsize; $count; } #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ sub runlabels # # Subroutine to run the loop through label files and subdirectories as needed # # Arguments: $_[0] = directory (unopened) # $_[1] = $opt_r (recursion flag) # { local ($dir=$_[0], $rflag=$_[1]); local (@label_file); # file names ending with ".lbl" local (@subdir); # subdirectory names local ($lf,$sd); # element counters for filename lists local ($file,$i); # loop/subscript variables local ($full_name); # full file name # Open the directory: opendir(DIR,$dir) || die "pdsidx: Could not open \"$dir\".\n"; # Get a sorted list of files in the directory, and parse it into lists # of label file names and subdirectories: $lf = $sd = 0; foreach $file (sort readdir(DIR)) { if ($file =~ /^\./) { next; } else { $file = $dir . '/' . $file; } $tfile = $file; $tfile =~ tr/a-z/A-Z/; if ($tfile =~ /\.LBL$/) { $label_file[$lf] = $file; $lf++; } elsif ($opt_a && is_label($tfile)) { $label_file[$lf] = $file; $lf++; } elsif (-d $file && $rflag) { $subdir[$sd] = $file; $sd++; } } # Now process the label files: foreach $file (@label_file) { open (LABEL,$file) || die "pdsidx: Could not open \"$file\".\n"; &process_label($file); } # Close the current directory and descend through subdirectories: close(DIR); foreach $file (@subdir) { &runlabels($file,$rflag); } } #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ sub process_label # Routine to read through the label file and pull out the desired keywords. # # Arguments: $_[0] = the full name of the file (already opened on LABEL) # { local ($file=$_[0]); # Full name of the file on LABEL local ($line); # input buffer local ($buffer); # string buffer local ($found_first) = 0; # flag for first object pointer local (%val); # Array of found values local ($name); # Object names for initialization loop local ($i); # string subscript/index local ($prtval); # Holding place for value to be printed # Before we can begin processing object names, we must initialize the # %object_count array. For the very first label, this array will have # no elements. In subsequent labels, all existing elements should be # re-initialized to zero, since object numbering is only relative to # other objects in the same label: foreach $name (keys %object_count) { $object_count{$name} = 0; } # First, assign the special values for ^PATH, ^LABEL and # ^FILE_SPECIFICATION_NAME values: $buffer = $file; $buffer =~ s/\/[a-zA-Z0-9_.]*$//; # Remove file name $buffer =~ s/^[\.\/]+//; # Remove relative part of path name $val{'^PATH'} = $buffer; $buffer = $file; $buffer =~ s/^.*\///; $buffer =~ s/\.lbl//; $val{'^LABEL'} = $buffer; $val{'^FILE_SPECIFICATION_NAME'} = $val{'^PATH'} . "/" . $val{'^LABEL'} . ".lbl"; $object = ""; while ($line=&nextline) { # Do some format cleanup to ease processing later: $line =~ s/^\s+//; # Clip leading blanks $line =~ s/\s*=\s*/=/; # Remove blanks around first '=' # Split the line at the first '=': $i = index($line,"="); if ($i >= 0) { $keyword = substr($line,0,$i); $value = substr($line,$i+1); $value =~ s/^("|')//; $value =~ s/("|')$//; } else { $keyword = $line; undef ($value); } $keyword =~ tr/a-z/A-Z/; # Force keyword to upper case last if ($keyword eq "END"); # Check for end of label # Check for end of object and pop object name as needed. Note that # we're assuming the label is well-formatted: if ($keyword eq "END_OBJECT") { $object =~ s/[^.]*\.$//; next; } # Any remaining one-word lines should be ignored as well: next unless ($value); # # OK, now we've got the keyword from the line. Time to check for new # OBJECT definitions and adjust the $object variable accordingly; # if ($keyword eq "OBJECT") { $object = $object . $value; #printf " New object: $object\n"; # Increment the counter for this type of object, and add the # appropriate subscript: $object_count{$object}++; $object = sprintf("%s[%d].",$object,$object_count{$object}); next; } # Add the object path to the keyword value. If this string has a value # in the %size array, save its corresponding value in the %val array: $keyword = $object . $keyword; #printf " Checking $keyword\n"; if ($size{$keyword}) { $val{$keyword} = $value; } # Check for the special fields associated with the data file pointer: if ( $keyword =~/^\^/ && !$found_first) # Ignore all but first { $keyword =~ s/^\^//; # pointer field # Check to see if the value is enclosed in parentheses, If so, # this indicates a filename with an offset, either in records # or bytes (BYTES flag included): if ( $value =~ /[(]/ ) { $value =~ s/ *//g; # Collapse blanks $value =~ s/[()]//g; # Remove parentheses ($filename, $offset, $byte_flag) = split(/,/,$value); $filename =~ s/("|')//g; # Remove quotes } else { $filename = $value; } # Assign the Filename and offset values to their respective elements: if ($byte_flag) { $offset .= " " . $byte_flag; } $val{'^FILE'} = $filename; $val{'^OFFSET'} = $offset; $val{'^OBJECT'} = $keyword; $found_first = 1; } } # # End of label. Print the index line for this label. Values are printed # in the order in which they were listed in the @name array: # if ($opt_o) { $oldhandle = select(OUTPUT); } # In each case, first we write the format string, then substitute the # literal value, if needed, for missing values, then write the value: for ($i=0; $i<$num_fields-1; $i++) { $bytes = $size{$name[$i]}; if ($bytes < 0) { $format = sprintf ("%%%d.%ds ",$bytes,-$bytes); } else { $format = sprintf ("%%%d.%ds ",$bytes,$bytes); } $prtval = ($val{$name[$i]} eq "")? $literal[$i] : $val{$name[$i]}; printf $format,$prtval; } $bytes = $size{$name[$num_fields-1]}; if ($bytes < 0) { $format = sprintf ("%%%d.%ds",$bytes,-$bytes); } else { $format = sprintf ("%%%d.%ds",$bytes,$bytes); } $prtval = ($val{$name[$num_fields-1]} eq "")? $literal[$num_fields-1] : $val{$name[$num_fields-1]}; printf "$format",$prtval; printf "\n"; if ($opt_o) { select($oldhandle); } } #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ sub nextline # Routine to return the next complete line from the input label file open # on