This problem is driving me crazy! I've tried using several Perl modules including XML::Parser and HTML::TagReader but neither seems to have exactly what I want. Let me provide a brief synopsis of the application I'm developing. Basically it's a simple program that allows the user to select a directory which may be filled with hundreds of arbitrarily-formatted XML documents (though each within a directory will be formatted the same). Once they select a directory, the I need to get a list of tags used in each document in order to get a large array that has all the tags used in the document in order to let the user associate each tag with a set of established column headers in our database. So basically one XML document will have tags:
<item_name>
<producer>
<director>
and our actual column headers are
Object Name
Production Lead
Direction Lead
for example. I hope I was clear enough but what I need is basically some method of achieving that first list in an array. Code of solution attempt featuring HTML::TagReader included below for good measure:
CODE
#!/usr/bin/perl -w --
# generated by wxGlade 0.6.3 on Mon Jul 06 10:25:07 2009
# To get wxPerl visit http://wxPerl.sourceforge.net/
use Wx 0.15 qw[:allclasses];
use strict;
package XMLLoaderFrame;
use Wx qw[:everything];
use HTML::TagReader;
use Wx::Event qw(EVT_BUTTON);
use base qw(Wx::Frame);
use strict;
sub new {
my( $self, $parent, $id, $title, $pos, $size, $style, $name ) = @_;
$parent = undef unless defined $parent;
$id = -1 unless defined $id;
$title = "" unless defined $title;
$pos = wxDefaultPosition unless defined $pos;
$size = wxDefaultSize unless defined $size;
$name = "" unless defined $name;
# begin wxGlade: XMLLoaderFrame::new
$style = wxDEFAULT_FRAME_STYLE
unless defined $style;
$self = $self->SUPER::new( $parent, $id, $title, $pos, $size, $style, $name );
$self->{tagpanel} = Wx::Panel->new($self, -1, wxDefaultPosition, wxDefaultSize, );
$self->{treepanel} = Wx::Panel->new($self, -1, wxDefaultPosition, wxDefaultSize, );
$self->{tagpanelsizer_staticbox} = Wx::StaticBox->new($self->{tagpanel}, -1, "Match Tags" );
$self->{treepanelsizer_staticbox} = Wx::StaticBox->new($self->{treepanel}, -1, "Start Here" );
$self->{helptext} = Wx::StaticText->new($self->{treepanel}, -1, "To begin using this tool, select a directory from the browser below that contains XML files from ONE vendor.", wxDefaultPosition, wxDefaultSize, );
use Wx::Perl::DirTree;
$self->{dirtree} = Wx::Perl::DirTree->new($self->{treepanel}, [251, 300]);
$self->{pathtext} = Wx::TextCtrl->new($self->{treepanel}, -1, "Enter path to directory...", wxDefaultPosition, wxDefaultSize, wxTE_RICH);
$self->{selectdir} = Wx::Button->new($self->{treepanel}, -1, "Select");
$self->{vendor_text} = Wx::StaticText->new($self->{tagpanel}, -1, "Vendor:", wxDefaultPosition, wxDefaultSize, );
$self->{vendor_field} = Wx::TextCtrl->new($self->{tagpanel}, -1, "", wxDefaultPosition, wxDefaultSize, );
$self->{title_text} = Wx::StaticText->new($self->{tagpanel}, -1, "Title:\n", wxDefaultPosition, wxDefaultSize, );
$self->{title_drop} = Wx::ComboBox->new($self->{tagpanel}, -1, "", wxDefaultPosition, wxDefaultSize, [], wxCB_DROPDOWN|wxCB_DROPDOWN|wxCB_READONLY);
$self->{filename_text} = Wx::StaticText->new($self->{tagpanel}, -1, "File Name:", wxDefaultPosition, wxDefaultSize, );
$self->{filename_drop} = Wx::ComboBox->new($self->{tagpanel}, -1, "", wxDefaultPosition, wxDefaultSize, [], wxCB_DROPDOWN|wxCB_DROPDOWN|wxCB_READONLY);
$self->{docdate_text} = Wx::StaticText->new($self->{tagpanel}, -1, "Doc. Date:", wxDefaultPosition, wxDefaultSize, );
$self->{docdate_drop} = Wx::ComboBox->new($self->{tagpanel}, -1, "", wxDefaultPosition, wxDefaultSize, [], wxCB_DROPDOWN|wxCB_DROPDOWN|wxCB_READONLY);
$self->{filetype_text} = Wx::StaticText->new($self->{tagpanel}, -1, "File Type:", wxDefaultPosition, wxDefaultSize, );
$self->{filetype_drop} = Wx::ComboBox->new($self->{tagpanel}, -1, "", wxDefaultPosition, wxDefaultSize, [], wxCB_DROPDOWN|wxCB_DROPDOWN|wxCB_READONLY);
$self->{filesize_text} = Wx::StaticText->new($self->{tagpanel}, -1, "File Size:", wxDefaultPosition, wxDefaultSize, );
$self->{filesize_drop} = Wx::ComboBox->new($self->{tagpanel}, -1, "", wxDefaultPosition, wxDefaultSize, [], wxCB_DROPDOWN|wxCB_DROPDOWN|wxCB_READONLY);
$self->{description_text} = Wx::StaticText->new($self->{tagpanel}, -1, "Description: ", wxDefaultPosition, wxDefaultSize, );
$self->{description_drop} = Wx::ComboBox->new($self->{tagpanel}, -1, "", wxDefaultPosition, wxDefaultSize, [], wxCB_DROPDOWN|wxCB_DROPDOWN|wxCB_READONLY);
$self->{keywords_text} = Wx::StaticText->new($self->{tagpanel}, -1, "Keywords:", wxDefaultPosition, wxDefaultSize, );
$self->{keywords_drop} = Wx::ComboBox->new($self->{tagpanel}, -1, "", wxDefaultPosition, wxDefaultSize, [], wxCB_DROPDOWN);
$self->{tech_text} = Wx::StaticText->new($self->{tagpanel}, -1, "Tech:", wxDefaultPosition, wxDefaultSize, );
$self->{tech_drop} = Wx::ComboBox->new($self->{tagpanel}, -1, "", wxDefaultPosition, wxDefaultSize, [], wxCB_DROPDOWN|wxCB_DROPDOWN|wxCB_READONLY);
$self->{geography_text} = Wx::StaticText->new($self->{tagpanel}, -1, "Geography:", wxDefaultPosition, wxDefaultSize, );
$self->{geography_drop} = Wx::ComboBox->new($self->{tagpanel}, -1, "", wxDefaultPosition, wxDefaultSize, [], wxCB_DROPDOWN|wxCB_DROPDOWN|wxCB_READONLY);
$self->{segment_text} = Wx::StaticText->new($self->{tagpanel}, -1, "Segment:", wxDefaultPosition, wxDefaultSize, );
$self->{segment_drop} = Wx::ComboBox->new($self->{tagpanel}, -1, "", wxDefaultPosition, wxDefaultSize, [], wxCB_DROPDOWN|wxCB_DROPDOWN|wxCB_READONLY);
$self->{objtype_text} = Wx::StaticText->new($self->{tagpanel}, -1, "Obj. Type:", wxDefaultPosition, wxDefaultSize, );
$self->{objtype_drop} = Wx::ComboBox->new($self->{tagpanel}, -1, "", wxDefaultPosition, wxDefaultSize, [], wxCB_DROPDOWN|wxCB_DROPDOWN|wxCB_READONLY);
$self->{competitor_text} = Wx::StaticText->new($self->{tagpanel}, -1, "Competitor: ", wxDefaultPosition, wxDefaultSize, );
$self->{competitor_drop} = Wx::ComboBox->new($self->{tagpanel}, -1, "", wxDefaultPosition, wxDefaultSize, [], wxCB_DROPDOWN|wxCB_DROPDOWN|wxCB_READONLY);
$self->{vertical_text} = Wx::StaticText->new($self->{tagpanel}, -1, "Vertical:", wxDefaultPosition, wxDefaultSize, );
$self->{vertical_drop} = Wx::ComboBox->new($self->{tagpanel}, -1, "", wxDefaultPosition, wxDefaultSize, [], wxCB_DROPDOWN|wxCB_DROPDOWN|wxCB_READONLY);
$self->{generatexls} = Wx::Button->new($self->{tagpanel}, -1, "Generate XLS");
$self->__set_properties();
$self->__do_layout();
# end wxGlade
EVT_BUTTON($self, $self->{selectdir}, \&OnSelectClick);
return $self;
}
sub OnSelectClick{
my($self, $event) = @_;
# Use $self->{dirtree}->GetSelectedPath() with HTML::TagReader in order to read in a list
# of all XML tags from the files in this directory.
my @xmlfiles = ();
opendir(DIR, $self->{dirtree}->GetSelectedPath()) || die "Cannot open selected path. Make sure a path is selected!";
@xmlfiles = grep(/\.xml$/, readdir(DIR));
closedir(DIR);
my $xmlreader;
my $showerr = 0;
my @taglist = ();
# For every XML file in our list...
for(my $count = 0; $count < @xmlfiles; $count++){
# Create an XML reader for that file, get all the tag data into an array then add only relevant tag data
# to the @taglist array.
$xmlreader = new HTML::TagReader $self->{dirtree}->GetSelectedPath() . "\\" . $xmlfiles[$count];
my @tagarr = $xmlreader->gettag($showerr);
for(my $subcount = 0; $subcount < @tagarr; $subcount++){
push(@taglist, $tagarr[$subcount*3]);
}
}
# For every tag which appears in the complete tag array, append it to every drop-down menu list.
for(my $tagcount = 0; $tagcount < @taglist; $tagcount++){
$self->{title_drop}->AppendString($taglist[$tagcount]);
$self->{filename_drop}->AppendString($taglist[$tagcount]);
$self->{docdate_drop}->AppendString($taglist[$tagcount]);
$self->{filetype_drop}->AppendString($taglist[$tagcount]);
$self->{filesize_drop}->AppendString($taglist[$tagcount]);
$self->{description_drop}->AppendString($taglist[$tagcount]);
$self->{keywords_drop}->AppendString($taglist[$tagcount]);
$self->{tech_drop}->AppendString($taglist[$tagcount]);
$self->{geography_drop}->AppendString($taglist[$tagcount]);
$self->{segment_drop}->AppendString($taglist[$tagcount]);
$self->{objtype_drop}->AppendString($taglist[$tagcount]);
$self->{competitor_drop}->AppendString($taglist[$tagcount]);
$self->{vertical_drop}->AppendString($taglist[$tagcount]);
}
# Redraw related frame to correctly reflect changes.
$self->{tagpanel}->Refresh();
}
sub __set_properties {
my $self = shift;
# begin wxGlade: XMLLoaderFrame::__set_properties
$self->SetTitle("Cisco XML Data Loader");
$self->SetSize(Wx::Size->new(650, 605));
$self->{helptext}->SetMinSize(Wx::Size->new(-1, -1));
$self->{pathtext}->SetMinSize(Wx::Size->new(256, 18));
$self->{treepanel}->SetMinSize(Wx::Size->new(251, 343));
$self->{title_drop}->SetSelection(-1);
$self->{filename_drop}->SetSelection(-1);
$self->{docdate_drop}->SetSelection(-1);
$self->{filetype_drop}->SetSelection(-1);
$self->{filesize_drop}->SetSelection(-1);
$self->{description_drop}->SetSelection(-1);
$self->{keywords_drop}->SetSelection(-1);
$self->{tech_drop}->SetSelection(-1);
$self->{geography_drop}->SetSelection(-1);
$self->{segment_drop}->SetSelection(-1);
$self->{objtype_drop}->SetSelection(-1);
$self->{competitor_drop}->SetSelection(-1);
$self->{vertical_drop}->SetSelection(-1);
$self->{tagpanel}->SetMinSize(Wx::Size->new(251, 343));
# end wxGlade
$self->{helptext}->Wrap(241);
}
sub __do_layout {
my $self = shift;
# begin wxGlade: XMLLoaderFrame::__do_layout
$self->{mainsizer} = Wx::BoxSizer->new(wxVERTICAL);
$self->{panelsizer} = Wx::BoxSizer->new(wxHORIZONTAL);
$self->{tagpanelsizer}= Wx::StaticBoxSizer->new($self->{tagpanelsizer_staticbox}, wxVERTICAL);
$self->{tagsizer} = Wx::GridSizer->new(14, 3, 0, 0);
$self->{treepanelsizer}= Wx::StaticBoxSizer->new($self->{treepanelsizer_staticbox}, wxVERTICAL);
$self->{treesizer} = Wx::BoxSizer->new(wxVERTICAL);
$self->{treepanelsizer}->Add(20, 20, 0, 0, 0);
$self->{treepanelsizer}->Add($self->{helptext}, 0, wxEXPAND, 0);
$self->{treepanelsizer}->Add(20, 20, 0, 0, 0);
$self->{treesizer}->Add($self->{dirtree}->GetTree(), 1, wxEXPAND, 0);
$self->{treepanelsizer}->Add($self->{treesizer}, 1, wxEXPAND, 0);
$self->{treepanelsizer}->Add(20, 20, 0, 0, 0);
$self->{treepanelsizer}->Add($self->{pathtext}, 0, wxEXPAND, 0);
$self->{treepanelsizer}->Add(20, 20, 0, 0, 0);
$self->{treepanelsizer}->Add($self->{selectdir}, 0, 0, 0);
$self->{treepanel}->SetSizer($self->{treepanelsizer});
$self->{panelsizer}->Add($self->{treepanel}, 1, wxRIGHT|wxEXPAND, 1);
$self->{tagsizer}->Add($self->{vendor_text}, 0, 0, 0);
$self->{tagsizer}->Add($self->{vendor_field}, 0, 0, 0);
$self->{tagsizer}->Add(10, 10, 0, 0, 0);
$self->{tagsizer}->Add($self->{title_text}, 0, 0, 0);
$self->{tagsizer}->Add($self->{title_drop}, 0, 0, 0);
$self->{tagsizer}->Add(10, 10, 0, 0, 0);
$self->{tagsizer}->Add($self->{filename_text}, 0, 0, 0);
$self->{tagsizer}->Add($self->{filename_drop}, 0, 0, 0);
$self->{tagsizer}->Add(10, 10, 0, 0, 0);
$self->{tagsizer}->Add($self->{docdate_text}, 0, 0, 0);
$self->{tagsizer}->Add($self->{docdate_drop}, 0, 0, 0);
$self->{tagsizer}->Add(10, 10, 0, 0, 0);
$self->{tagsizer}->Add($self->{filetype_text}, 0, 0, 0);
$self->{tagsizer}->Add($self->{filetype_drop}, 0, 0, 0);
$self->{tagsizer}->Add(10, 10, 0, 0, 0);
$self->{tagsizer}->Add($self->{filesize_text}, 0, 0, 0);
$self->{tagsizer}->Add($self->{filesize_drop}, 0, 0, 0);
$self->{tagsizer}->Add(10, 10, 0, 0, 0);
$self->{tagsizer}->Add($self->{description_text}, 0, 0, 0);
$self->{tagsizer}->Add($self->{description_drop}, 0, 0, 0);
$self->{tagsizer}->Add(10, 10, 0, 0, 0);
$self->{tagsizer}->Add($self->{keywords_text}, 0, 0, 0);
$self->{tagsizer}->Add($self->{keywords_drop}, 0, 0, 0);
$self->{tagsizer}->Add(10, 10, 0, 0, 0);
$self->{tagsizer}->Add($self->{tech_text}, 0, 0, 0);
$self->{tagsizer}->Add($self->{tech_drop}, 0, 0, 0);
$self->{tagsizer}->Add(10, 10, 0, 0, 0);
$self->{tagsizer}->Add($self->{geography_text}, 0, 0, 0);
$self->{tagsizer}->Add($self->{geography_drop}, 0, 0, 0);
$self->{tagsizer}->Add(10, 10, 0, 0, 0);
$self->{tagsizer}->Add($self->{segment_text}, 0, 0, 0);
$self->{tagsizer}->Add($self->{segment_drop}, 0, 0, 0);
$self->{tagsizer}->Add(10, 10, 0, 0, 0);
$self->{tagsizer}->Add($self->{objtype_text}, 0, 0, 0);
$self->{tagsizer}->Add($self->{objtype_drop}, 0, 0, 0);
$self->{tagsizer}->Add(10, 10, 0, 0, 0);
$self->{tagsizer}->Add($self->{competitor_text}, 0, 0, 0);
$self->{tagsizer}->Add($self->{competitor_drop}, 0, 0, 0);
$self->{tagsizer}->Add(10, 10, 0, 0, 0);
$self->{tagsizer}->Add($self->{vertical_text}, 0, 0, 0);
$self->{tagsizer}->Add($self->{vertical_drop}, 0, 0, 0);
$self->{tagsizer}->Add(10, 10, 0, 0, 0);
$self->{tagpanelsizer}->Add($self->{tagsizer}, 1, wxEXPAND, 0);
$self->{tagpanelsizer}->Add($self->{generatexls}, 0, 0, 0);
$self->{tagpanel}->SetSizer($self->{tagpanelsizer});
$self->{panelsizer}->Add($self->{tagpanel}, 1, wxEXPAND, 0);
$self->{mainsizer}->Add($self->{panelsizer}, 1, wxEXPAND, 0);
$self->SetSizer($self->{mainsizer});
$self->{mainsizer}->SetSizeHints($self);
$self->Layout();
# end wxGlade
}
# end of class XMLLoaderFrame
1;
package XMLLoader;
use base qw(Wx::App);
use strict;
sub OnInit {
my( $self ) = shift;
Wx::InitAllImageHandlers();
my $frame = XMLLoaderFrame->new();
$self->SetTopWindow($frame);
$frame->Show(1);
return 1;
}
# end of class XMLLoader
package main;
unless(caller){
my $XMLLoader = XMLLoader->new();
$XMLLoader->MainLoop();
}
This post has been edited by Tracekill: 14 Jul, 2009 - 10:39 AM