Sometimes you need to parse a very simple XML file (config for your own project). In cases like that you may like to use something quick an dirty, just to get the content.
Below is a VERY PRIMITIVE XML parser.
Your config might look like the below
<unit name="sometest">
<package>
TestHarness
</package>
<targets>
<hosts location="DC1">
myhostdc11.mynetwork.net, myhostdc12.mynetwork.net
</hosts>
<hosts location="DC2">
myhostdc21.mynetwork.net, myhostdc22.mynetwork.net
</hosts>
</targets>
<svn>
http://subversion.ny.mynetwork.net/svn/repos/MYPROJECT
</svn>
<deployment>
TAG_NUMBER=$1
tar -zxvf TestHarness_${TAG_NUMBER}.tar.gz
ln -sfn TestHarness_${TAG_NUMBER} testharness
cd ./testharness
chmod -R 777 *
cd ./scripts
dos2unix *
chmod 777 *
cd ../data
dos2unix *
chmod 777 *
</deployment>
</unit>
|
The Perl script to parse this will look like below (NOTE I had to change formatting a little to fit this page).
#!/usr/bin/perl
{
$num_args = $#ARGV + 1;
if ($num_args != 4) {
print "\nUsage: " .
"configurator.pl configfile_name project_name " .
"datacenter [script|hosts|vars]\n";
exit;
}
$conf_file=$ARGV[0];
$project_name=$ARGV[1];
$datacenter=$ARGV[2];
$sectionout=$ARGV[3];
my @project_config;
my $onestring = "";
open (CONFFILE, $conf_file) or die("Unable to open file");
$inproject=0;
while (<CONFFILE>) {
chomp;
push(@file_data,$_);
}
my @project_config = &getcontentforattr(\@file_data, "unit",
"name", $project_name );
my (@content);
if($sectionout eq "script"){
my @content = &getcontent(\@project_config, "deployment");
foreach $temp (@content){
print $temp . "\n";
}
}
if($sectionout eq "vars"){
my $svnlocation =&getsvnlocation(\@project_config);
print "SVNLOCATION=" . ltrim($svnlocation) . "\n";
my $packagename = &getpackagename(\@project_config);
print "PACKAGENAME=" . $packagename . "\n";
}
if($sectionout eq "hosts"){
my @projhosts = &gethosts(\@project_config, $datacenter);
foreach $host (@projhosts){
print $host . "\n";
}
}
close (CONFFILE);
}
####################
sub getdeploymentscript
{
my(@proj_conf) = @{$_[0]};
my @output = &getcontent(\@proj_conf, "deployment");
return (@output);
}
#######################
sub getsvnlocation
{
my(@proj_conf) = @{$_[0]};
my $strout = "";
my @output = &gettrimcontent(\@proj_conf, "svn");
foreach $temp (@output){
$strout = $strout . $temp;
}
return ($strout);
}
##########################
sub getpackagename
{
my(@proj_conf) = @{$_[0]};
my $strout = "";
my @output = &gettrimcontent(\@proj_conf, "package");
foreach $temp (@output){
$strout = $strout . $temp;
}
return ($strout);
}
#############################################
sub gethosts
{
my(@proj_conf) = @{$_[0]};
my($dc) = $_[1];
my @section_targets = &getcontent(\@proj_conf, "targets");
my @section_hosts = &getcontentforattr(\@section_targets,
"hosts",
"location", $dc );
my (@hostsout);
foreach $hostlongline (@section_hosts){
my @hoststmp = split(/,/,$hostlongline);
foreach $host ( @hoststmp){
if(length($host) > 0){
push(@hostsout,trim($host));
}
}
}
return (@hostsout);
}
#############################################
# parser functions
#############################################
sub gettrimcontent
{
my (@section_in) = @{$_[0]};
my $tag = $_[1];
my (@content_out);
my @content = &getcontent(\@section_in, $tag);
foreach $temp (@content){
if( length($temp) > 0 ){
push(@content_out, trim($temp));
}
}
return (@content_out);
}
############################################
sub getcontent
{
my (@section_in) = @{$_[0]};
my $tag = $_[1];
my (@content);
my $insection = 0;
my $tagline = 0;
foreach $sectionline (@section_in){
$tagline = 0;
if ( $insection == 0 && $sectionline =~ /<$tag *.*>/ ){
$insection=1;
$tagline = 1;
$sectionline = substr($sectionline,($+[0])+1);
}
if ( $insection == 1 && $sectionline =~ /<\/$tag *>/ ){
$insection=0;
$tagline = 1;
$sectionline=substr($sectionline,0,($-[0]));
if(length(trim($sectionline))>0 ) {
push(@content, $sectionline);
}
}
if( $insection == 1 ){
if ($tagline == 1 && length(trim($sectionline)) > 0){
push(@content,$sectionline);
}
elsif ($tagline == 0){
push(@content,$sectionline);
}
}
}
return (@content);
}
#############################################
sub getcontentforattr
{
my (@section_in) = @{$_[0]};
my $tag = $_[1];
my $attr = $_[2];
my $attrvalue = $_[3];
my (@content);
my $insection = 0;
my $tagline = 0;
foreach $sectionline (@section_in){
$tagline = 0;
if (
$insection == 0 &&
$sectionline =~ /<$tag .*$attr=\"$attrvalue\".*>/
){
$insection=1;
$tagline = 1;
$sectionline = substr($sectionline,($+[0])+1);
}
if ( $insection == 1 && $sectionline =~ /<\/$tag *>/ ){
$insection=0;
$tagline = 1;
$sectionline=substr($sectionline,0,($-[0]));
if(length(trim($sectionline))>0 ) {
push(@content, $sectionline);
}
}
if( $insection == 1 ){
if ($tagline == 1 && length(trim($sectionline)) > 0){
push(@content,$sectionline);
}
elsif ($tagline == 0){
push(@content,$sectionline);
}
}
}
return (@content);
}
#############################################
##### Service functions
#############################################
sub trim($)
{
my $string = shift;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
}
# Left trim function to remove leading whitespace
sub ltrim($)
{
my $string = shift;
$string =~ s/^\s+//;
return $string;
}
# Right trim function to remove trailing whitespace
sub rtrim($)
{
my $string = shift;
$string =~ s/\s+$//;
return $string;
}
|