#!/usr/bin/perl # M.Duke # UKOLN March 2003 # Version 0.1 # Modifies RDN XML records # Usage trans.pl -c -i -o # applies the regular expressions in configfile to the # input file and writes the output to outputfile # Note: inputfile must be given with the full path, # including the current directory e.g. `./record.xml' # if the -i switch is - the input is taken from standard input # Reads the required transformations from a config file # the config file is made up of element_name regexp pairs # each pair is on a new line # e.g. type s/xxx/yyyy/i # and writes the output to the output file # deals with input files containing more than one record # does not deal with multiple files use XML::Simple; use Data::Dumper; use Getopt::Std; if (! getopts('c:i:o:')){ die ("Usage: trans.pl -c configfile -i input file or - for standard input -o output file"); } print "input file is $opt_i config file is $opt_c output file is $opt_o\n"; if ($opt_i ne '-') { # an input file name has been given $xmlp = XMLin($opt_i, forcearray=>1); } else { #xml is in standard input while(<>){ $xmlin .= $_; # store in a string } $xmlp = XMLin($xmlin, forcearray=>1); # give the string to XMLin } $xmlt = doTransforms($xmlp); # call this for each file # for dealing with multiple files (how to handle output?) sub doTransforms { # applies to transforms to one file my ($ref) = @_; #content of one or more records from one file @records = @{$ref->{record}}; #an array of references to records open (CONF, "$opt_c") || die "can't open config file $opt_c"; # for each line in CONF while (){ chop; ($tag, $regexp) = split(/ /, $_); print "Tag is $tag regexp is $regexp"; #deal with records one by one foreach $rec (@records){ # could be each $r in $ref->record[$r] #print "First $tag found is $rec->{metadata}[0]->{dc}[0]->{$tag}->[0]\n"; # do the substitutions only for matching tag if ($rec->{metadata}[0]->{dc}[0]->{$tag}) { #print "found a $tag\n"; # dereference and get array of the values of element @val = @{ $rec->{metadata}[0]->{dc}[0]->{$tag} }; $i = 0; while ($i <= $#val){ #print "i is $i\n"; print "Apply $regexp to $tag $rec->{metadata}[0]->{dc}[0]->{$tag}->[$i]\n"; # $rec->{metadata}[0]->{dc}[0]->{$tag}->[$i] = trim($rec->{metadata}[0]->{dc}[0]->{$tag}->[$i]); $substituted = "\$rec->{metadata}[0]->{dc}[0]->{$tag}->[$i] =~ $regexp;"; eval ($substituted); print "After applying regexp content of $tag is $rec->{metadata}[0]->{dc}[0]->{$tag}->[$i]\n"; $i++; } #finished substituting all values of a tag } #finished dealing with tag if found } #end of a record } # finished with config file return $ref; 1 } #end of do Transform sub trim { ($_) = @_; s/^\s*//; s/\s*$//; return $_; } # finally print out the substituted XML to file $str = XMLout($xmlt); open(OUTFILE, ">$opt_o") || die ("can't open output file $opt_o"); print OUTFILE "$str"; #print Dumper $xmlt;