12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284 |
- # $Id: Simple.pm,v 1.40 2007/08/15 10:36:48 grantm Exp $
- package XML::Simple;
- =head1 NAME
- XML::Simple - Easy API to maintain XML (esp config files)
- =head1 SYNOPSIS
- use XML::Simple;
- my $ref = XMLin([<xml file or string>] [, <options>]);
- my $xml = XMLout($hashref [, <options>]);
- Or the object oriented way:
- require XML::Simple;
- my $xs = XML::Simple->new(options);
- my $ref = $xs->XMLin([<xml file or string>] [, <options>]);
- my $xml = $xs->XMLout($hashref [, <options>]);
- (or see L<"SAX SUPPORT"> for 'the SAX way').
- To catch common errors:
- use XML::Simple qw(:strict);
- (see L<"STRICT MODE"> for more details).
- =cut
- # See after __END__ for more POD documentation
- # Load essentials here, other modules loaded on demand later
- use strict;
- use Carp;
- require Exporter;
- ##############################################################################
- # Define some constants
- #
- use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $PREFERRED_PARSER);
- @ISA = qw(Exporter);
- @EXPORT = qw(XMLin XMLout);
- @EXPORT_OK = qw(xml_in xml_out);
- $VERSION = '2.18';
- $PREFERRED_PARSER = undef;
- my $StrictMode = 0;
- my @KnownOptIn = qw(keyattr keeproot forcecontent contentkey noattr
- searchpath forcearray cache suppressempty parseropts
- grouptags nsexpand datahandler varattr variables
- normalisespace normalizespace valueattr);
- my @KnownOptOut = qw(keyattr keeproot contentkey noattr
- rootname xmldecl outputfile noescape suppressempty
- grouptags nsexpand handler noindent attrindent nosort
- valueattr numericescape);
- my @DefKeyAttr = qw(name key id);
- my $DefRootName = qq(opt);
- my $DefContentKey = qq(content);
- my $DefXmlDecl = qq(<?xml version='1.0' standalone='yes'?>);
- my $xmlns_ns = 'http://www.w3.org/2000/xmlns/';
- my $bad_def_ns_jcn = '{' . $xmlns_ns . '}'; # LibXML::SAX workaround
- ##############################################################################
- # Globals for use by caching routines
- #
- my %MemShareCache = ();
- my %MemCopyCache = ();
- ##############################################################################
- # Wrapper for Exporter - handles ':strict'
- #
- sub import {
- # Handle the :strict tag
-
- $StrictMode = 1 if grep(/^:strict$/, @_);
- # Pass everything else to Exporter.pm
- @_ = grep(!/^:strict$/, @_);
- goto &Exporter::import;
- }
- ##############################################################################
- # Constructor for optional object interface.
- #
- sub new {
- my $class = shift;
- if(@_ % 2) {
- croak "Default options must be name=>value pairs (odd number supplied)";
- }
- my %known_opt;
- @known_opt{@KnownOptIn, @KnownOptOut} = (undef) x 100;
- my %raw_opt = @_;
- my %def_opt;
- while(my($key, $val) = each %raw_opt) {
- my $lkey = lc($key);
- $lkey =~ s/_//g;
- croak "Unrecognised option: $key" unless(exists($known_opt{$lkey}));
- $def_opt{$lkey} = $val;
- }
- my $self = { def_opt => \%def_opt };
- return(bless($self, $class));
- }
- ##############################################################################
- # Sub: _get_object()
- #
- # Helper routine called from XMLin() and XMLout() to create an object if none
- # was provided. Note, this routine does mess with the caller's @_ array.
- #
- sub _get_object {
- my $self;
- if($_[0] and UNIVERSAL::isa($_[0], 'XML::Simple')) {
- $self = shift;
- }
- else {
- $self = XML::Simple->new();
- }
-
- return $self;
- }
- ##############################################################################
- # Sub/Method: XMLin()
- #
- # Exported routine for slurping XML into a hashref - see pod for info.
- #
- # May be called as object method or as a plain function.
- #
- # Expects one arg for the source XML, optionally followed by a number of
- # name => value option pairs.
- #
- sub XMLin {
- my $self = &_get_object; # note, @_ is passed implicitly
- my $target = shift;
- # Work out whether to parse a string, a file or a filehandle
- if(not defined $target) {
- return $self->parse_file(undef, @_);
- }
- elsif($target eq '-') {
- local($/) = undef;
- $target = <STDIN>;
- return $self->parse_string(\$target, @_);
- }
- elsif(my $type = ref($target)) {
- if($type eq 'SCALAR') {
- return $self->parse_string($target, @_);
- }
- else {
- return $self->parse_fh($target, @_);
- }
- }
- elsif($target =~ m{<.*?>}s) {
- return $self->parse_string(\$target, @_);
- }
- else {
- return $self->parse_file($target, @_);
- }
- }
- ##############################################################################
- # Sub/Method: parse_file()
- #
- # Same as XMLin, but only parses from a named file.
- #
- sub parse_file {
- my $self = &_get_object; # note, @_ is passed implicitly
- my $filename = shift;
- $self->handle_options('in', @_);
- $filename = $self->default_config_file if not defined $filename;
- $filename = $self->find_xml_file($filename, @{$self->{opt}->{searchpath}});
- # Check cache for previous parse
- if($self->{opt}->{cache}) {
- foreach my $scheme (@{$self->{opt}->{cache}}) {
- my $method = 'cache_read_' . $scheme;
- my $opt = $self->$method($filename);
- return($opt) if($opt);
- }
- }
- my $ref = $self->build_simple_tree($filename, undef);
- if($self->{opt}->{cache}) {
- my $method = 'cache_write_' . $self->{opt}->{cache}->[0];
- $self->$method($ref, $filename);
- }
- return $ref;
- }
- ##############################################################################
- # Sub/Method: parse_fh()
- #
- # Same as XMLin, but only parses from a filehandle.
- #
- sub parse_fh {
- my $self = &_get_object; # note, @_ is passed implicitly
- my $fh = shift;
- croak "Can't use " . (defined $fh ? qq{string ("$fh")} : 'undef') .
- " as a filehandle" unless ref $fh;
- $self->handle_options('in', @_);
- return $self->build_simple_tree(undef, $fh);
- }
- ##############################################################################
- # Sub/Method: parse_string()
- #
- # Same as XMLin, but only parses from a string or a reference to a string.
- #
- sub parse_string {
- my $self = &_get_object; # note, @_ is passed implicitly
- my $string = shift;
- $self->handle_options('in', @_);
- return $self->build_simple_tree(undef, ref $string ? $string : \$string);
- }
- ##############################################################################
- # Method: default_config_file()
- #
- # Returns the name of the XML file to parse if no filename (or XML string)
- # was provided.
- #
- sub default_config_file {
- my $self = shift;
- require File::Basename;
- my($basename, $script_dir, $ext) = File::Basename::fileparse($0, '\.[^\.]+');
- # Add script directory to searchpath
-
- if($script_dir) {
- unshift(@{$self->{opt}->{searchpath}}, $script_dir);
- }
- return $basename . '.xml';
- }
- ##############################################################################
- # Method: build_simple_tree()
- #
- # Builds a 'tree' data structure as provided by XML::Parser and then
- # 'simplifies' it as specified by the various options in effect.
- #
- sub build_simple_tree {
- my $self = shift;
- my $tree = $self->build_tree(@_);
- return $self->{opt}->{keeproot}
- ? $self->collapse({}, @$tree)
- : $self->collapse(@{$tree->[1]});
- }
- ##############################################################################
- # Method: build_tree()
- #
- # This routine will be called if there is no suitable pre-parsed tree in a
- # cache. It parses the XML and returns an XML::Parser 'Tree' style data
- # structure (summarised in the comments for the collapse() routine below).
- #
- # XML::Simple requires the services of another module that knows how to parse
- # XML. If XML::SAX is installed, the default SAX parser will be used,
- # otherwise XML::Parser will be used.
- #
- # This routine expects to be passed a filename as argument 1 or a 'string' as
- # argument 2. The 'string' might be a string of XML (passed by reference to
- # save memory) or it might be a reference to an IO::Handle. (This
- # non-intuitive mess results in part from the way XML::Parser works but that's
- # really no excuse).
- #
- sub build_tree {
- my $self = shift;
- my $filename = shift;
- my $string = shift;
- my $preferred_parser = $PREFERRED_PARSER;
- unless(defined($preferred_parser)) {
- $preferred_parser = $ENV{XML_SIMPLE_PREFERRED_PARSER} || '';
- }
- if($preferred_parser eq 'XML::Parser') {
- return($self->build_tree_xml_parser($filename, $string));
- }
- eval { require XML::SAX; }; # We didn't need it until now
- if($@) { # No XML::SAX - fall back to XML::Parser
- if($preferred_parser) { # unless a SAX parser was expressly requested
- croak "XMLin() could not load XML::SAX";
- }
- return($self->build_tree_xml_parser($filename, $string));
- }
- #$XML::SAX::ParserPackage = $preferred_parser if($preferred_parser);
- my $sp = XML::SAX::ParserFactory->parser(Handler => $self);
-
- $self->{nocollapse} = 1;
- my($tree);
- if($filename) {
- $tree = $sp->parse_uri($filename);
- }
- else {
- if(ref($string) && ref($string) ne 'SCALAR') {
- $tree = $sp->parse_file($string);
- }
- else {
- $tree = $sp->parse_string($$string);
- }
- }
- return($tree);
- }
- ##############################################################################
- # Method: build_tree_xml_parser()
- #
- # This routine will be called if XML::SAX is not installed, or if XML::Parser
- # was specifically requested. It takes the same arguments as build_tree() and
- # returns the same data structure (XML::Parser 'Tree' style).
- #
- sub build_tree_xml_parser {
- my $self = shift;
- my $filename = shift;
- my $string = shift;
- eval {
- local($^W) = 0; # Suppress warning from Expat.pm re File::Spec::load()
- require XML::Parser; # We didn't need it until now
- };
- if($@) {
- croak "XMLin() requires either XML::SAX or XML::Parser";
- }
- if($self->{opt}->{nsexpand}) {
- carp "'nsexpand' option requires XML::SAX";
- }
- my $xp = XML::Parser->new(Style => 'Tree', @{$self->{opt}->{parseropts}});
- my($tree);
- if($filename) {
- # $tree = $xp->parsefile($filename); # Changed due to prob w/mod_perl
- local(*XML_FILE);
- open(XML_FILE, '<', $filename) || croak qq($filename - $!);
- $tree = $xp->parse(*XML_FILE);
- close(XML_FILE);
- }
- else {
- $tree = $xp->parse($$string);
- }
- return($tree);
- }
- ##############################################################################
- # Method: cache_write_storable()
- #
- # Wrapper routine for invoking Storable::nstore() to cache a parsed data
- # structure.
- #
- sub cache_write_storable {
- my($self, $data, $filename) = @_;
- my $cachefile = $self->storable_filename($filename);
- require Storable; # We didn't need it until now
- if ('VMS' eq $^O) {
- Storable::nstore($data, $cachefile);
- }
- else {
- # If the following line fails for you, your Storable.pm is old - upgrade
- Storable::lock_nstore($data, $cachefile);
- }
-
- }
- ##############################################################################
- # Method: cache_read_storable()
- #
- # Wrapper routine for invoking Storable::retrieve() to read a cached parsed
- # data structure. Only returns cached data if the cache file exists and is
- # newer than the source XML file.
- #
- sub cache_read_storable {
- my($self, $filename) = @_;
-
- my $cachefile = $self->storable_filename($filename);
- return unless(-r $cachefile);
- return unless((stat($cachefile))[9] > (stat($filename))[9]);
- require Storable; # We didn't need it until now
-
- if ('VMS' eq $^O) {
- return(Storable::retrieve($cachefile));
- }
- else {
- return(Storable::lock_retrieve($cachefile));
- }
-
- }
- ##############################################################################
- # Method: storable_filename()
- #
- # Translates the supplied source XML filename into a filename for the storable
- # cached data. A '.stor' suffix is added after stripping an optional '.xml'
- # suffix.
- #
- sub storable_filename {
- my($self, $cachefile) = @_;
- $cachefile =~ s{(\.xml)?$}{.stor};
- return $cachefile;
- }
- ##############################################################################
- # Method: cache_write_memshare()
- #
- # Takes the supplied data structure reference and stores it away in a global
- # hash structure.
- #
- sub cache_write_memshare {
- my($self, $data, $filename) = @_;
- $MemShareCache{$filename} = [time(), $data];
- }
- ##############################################################################
- # Method: cache_read_memshare()
- #
- # Takes a filename and looks in a global hash for a cached parsed version.
- #
- sub cache_read_memshare {
- my($self, $filename) = @_;
-
- return unless($MemShareCache{$filename});
- return unless($MemShareCache{$filename}->[0] > (stat($filename))[9]);
- return($MemShareCache{$filename}->[1]);
-
- }
- ##############################################################################
- # Method: cache_write_memcopy()
- #
- # Takes the supplied data structure and stores a copy of it in a global hash
- # structure.
- #
- sub cache_write_memcopy {
- my($self, $data, $filename) = @_;
- require Storable; # We didn't need it until now
-
- $MemCopyCache{$filename} = [time(), Storable::dclone($data)];
- }
- ##############################################################################
- # Method: cache_read_memcopy()
- #
- # Takes a filename and looks in a global hash for a cached parsed version.
- # Returns a reference to a copy of that data structure.
- #
- sub cache_read_memcopy {
- my($self, $filename) = @_;
-
- return unless($MemCopyCache{$filename});
- return unless($MemCopyCache{$filename}->[0] > (stat($filename))[9]);
- return(Storable::dclone($MemCopyCache{$filename}->[1]));
-
- }
- ##############################################################################
- # Sub/Method: XMLout()
- #
- # Exported routine for 'unslurping' a data structure out to XML.
- #
- # Expects a reference to a data structure and an optional list of option
- # name => value pairs.
- #
- sub XMLout {
- my $self = &_get_object; # note, @_ is passed implicitly
- croak "XMLout() requires at least one argument" unless(@_);
- my $ref = shift;
- $self->handle_options('out', @_);
- # If namespace expansion is set, XML::NamespaceSupport is required
- if($self->{opt}->{nsexpand}) {
- require XML::NamespaceSupport;
- $self->{nsup} = XML::NamespaceSupport->new();
- $self->{ns_prefix} = 'aaa';
- }
- # Wrap top level arrayref in a hash
- if(UNIVERSAL::isa($ref, 'ARRAY')) {
- $ref = { anon => $ref };
- }
- # Extract rootname from top level hash if keeproot enabled
- if($self->{opt}->{keeproot}) {
- my(@keys) = keys(%$ref);
- if(@keys == 1) {
- $ref = $ref->{$keys[0]};
- $self->{opt}->{rootname} = $keys[0];
- }
- }
-
- # Ensure there are no top level attributes if we're not adding root elements
- elsif($self->{opt}->{rootname} eq '') {
- if(UNIVERSAL::isa($ref, 'HASH')) {
- my $refsave = $ref;
- $ref = {};
- foreach (keys(%$refsave)) {
- if(ref($refsave->{$_})) {
- $ref->{$_} = $refsave->{$_};
- }
- else {
- $ref->{$_} = [ $refsave->{$_} ];
- }
- }
- }
- }
- # Encode the hashref and write to file if necessary
- $self->{_ancestors} = [];
- my $xml = $self->value_to_xml($ref, $self->{opt}->{rootname}, '');
- delete $self->{_ancestors};
- if($self->{opt}->{xmldecl}) {
- $xml = $self->{opt}->{xmldecl} . "\n" . $xml;
- }
- if($self->{opt}->{outputfile}) {
- if(ref($self->{opt}->{outputfile})) {
- my $fh = $self->{opt}->{outputfile};
- if(UNIVERSAL::isa($fh, 'GLOB') and !UNIVERSAL::can($fh, 'print')) {
- eval { require IO::Handle; };
- croak $@ if $@;
- }
- return($fh->print($xml));
- }
- else {
- local(*OUT);
- open(OUT, '>', "$self->{opt}->{outputfile}") ||
- croak "open($self->{opt}->{outputfile}): $!";
- binmode(OUT, ':utf8') if($] >= 5.008);
- print OUT $xml || croak "print: $!";
- close(OUT);
- }
- }
- elsif($self->{opt}->{handler}) {
- require XML::SAX;
- my $sp = XML::SAX::ParserFactory->parser(
- Handler => $self->{opt}->{handler}
- );
- return($sp->parse_string($xml));
- }
- else {
- return($xml);
- }
- }
- ##############################################################################
- # Method: handle_options()
- #
- # Helper routine for both XMLin() and XMLout(). Both routines handle their
- # first argument and assume all other args are options handled by this routine.
- # Saves a hash of options in $self->{opt}.
- #
- # If default options were passed to the constructor, they will be retrieved
- # here and merged with options supplied to the method call.
- #
- # First argument should be the string 'in' or the string 'out'.
- #
- # Remaining arguments should be name=>value pairs. Sets up default values
- # for options not supplied. Unrecognised options are a fatal error.
- #
- sub handle_options {
- my $self = shift;
- my $dirn = shift;
- # Determine valid options based on context
- my %known_opt;
- if($dirn eq 'in') {
- @known_opt{@KnownOptIn} = @KnownOptIn;
- }
- else {
- @known_opt{@KnownOptOut} = @KnownOptOut;
- }
- # Store supplied options in hashref and weed out invalid ones
- if(@_ % 2) {
- croak "Options must be name=>value pairs (odd number supplied)";
- }
- my %raw_opt = @_;
- my $opt = {};
- $self->{opt} = $opt;
- while(my($key, $val) = each %raw_opt) {
- my $lkey = lc($key);
- $lkey =~ s/_//g;
- croak "Unrecognised option: $key" unless($known_opt{$lkey});
- $opt->{$lkey} = $val;
- }
- # Merge in options passed to constructor
- foreach (keys(%known_opt)) {
- unless(exists($opt->{$_})) {
- if(exists($self->{def_opt}->{$_})) {
- $opt->{$_} = $self->{def_opt}->{$_};
- }
- }
- }
- # Set sensible defaults if not supplied
-
- if(exists($opt->{rootname})) {
- unless(defined($opt->{rootname})) {
- $opt->{rootname} = '';
- }
- }
- else {
- $opt->{rootname} = $DefRootName;
- }
-
- if($opt->{xmldecl} and $opt->{xmldecl} eq '1') {
- $opt->{xmldecl} = $DefXmlDecl;
- }
- if(exists($opt->{contentkey})) {
- if($opt->{contentkey} =~ m{^-(.*)$}) {
- $opt->{contentkey} = $1;
- $opt->{collapseagain} = 1;
- }
- }
- else {
- $opt->{contentkey} = $DefContentKey;
- }
- unless(exists($opt->{normalisespace})) {
- $opt->{normalisespace} = $opt->{normalizespace};
- }
- $opt->{normalisespace} = 0 unless(defined($opt->{normalisespace}));
- # Cleanups for values assumed to be arrays later
- if($opt->{searchpath}) {
- unless(ref($opt->{searchpath})) {
- $opt->{searchpath} = [ $opt->{searchpath} ];
- }
- }
- else {
- $opt->{searchpath} = [ ];
- }
- if($opt->{cache} and !ref($opt->{cache})) {
- $opt->{cache} = [ $opt->{cache} ];
- }
- if($opt->{cache}) {
- $_ = lc($_) foreach (@{$opt->{cache}});
- foreach my $scheme (@{$opt->{cache}}) {
- my $method = 'cache_read_' . $scheme;
- croak "Unsupported caching scheme: $scheme"
- unless($self->can($method));
- }
- }
-
- if(exists($opt->{parseropts})) {
- if($^W) {
- carp "Warning: " .
- "'ParserOpts' is deprecated, contact the author if you need it";
- }
- }
- else {
- $opt->{parseropts} = [ ];
- }
-
- # Special cleanup for {forcearray} which could be regex, arrayref or boolean
- # or left to default to 0
- if(exists($opt->{forcearray})) {
- if(ref($opt->{forcearray}) eq 'Regexp') {
- $opt->{forcearray} = [ $opt->{forcearray} ];
- }
- if(ref($opt->{forcearray}) eq 'ARRAY') {
- my @force_list = @{$opt->{forcearray}};
- if(@force_list) {
- $opt->{forcearray} = {};
- foreach my $tag (@force_list) {
- if(ref($tag) eq 'Regexp') {
- push @{$opt->{forcearray}->{_regex}}, $tag;
- }
- else {
- $opt->{forcearray}->{$tag} = 1;
- }
- }
- }
- else {
- $opt->{forcearray} = 0;
- }
- }
- else {
- $opt->{forcearray} = ( $opt->{forcearray} ? 1 : 0 );
- }
- }
- else {
- if($StrictMode and $dirn eq 'in') {
- croak "No value specified for 'ForceArray' option in call to XML$dirn()";
- }
- $opt->{forcearray} = 0;
- }
- # Special cleanup for {keyattr} which could be arrayref or hashref or left
- # to default to arrayref
- if(exists($opt->{keyattr})) {
- if(ref($opt->{keyattr})) {
- if(ref($opt->{keyattr}) eq 'HASH') {
- # Make a copy so we can mess with it
- $opt->{keyattr} = { %{$opt->{keyattr}} };
-
- # Convert keyattr => { elem => '+attr' }
- # to keyattr => { elem => [ 'attr', '+' ] }
- foreach my $el (keys(%{$opt->{keyattr}})) {
- if($opt->{keyattr}->{$el} =~ /^(\+|-)?(.*)$/) {
- $opt->{keyattr}->{$el} = [ $2, ($1 ? $1 : '') ];
- if($StrictMode and $dirn eq 'in') {
- next if($opt->{forcearray} == 1);
- next if(ref($opt->{forcearray}) eq 'HASH'
- and $opt->{forcearray}->{$el});
- croak "<$el> set in KeyAttr but not in ForceArray";
- }
- }
- else {
- delete($opt->{keyattr}->{$el}); # Never reached (famous last words?)
- }
- }
- }
- else {
- if(@{$opt->{keyattr}} == 0) {
- delete($opt->{keyattr});
- }
- }
- }
- else {
- $opt->{keyattr} = [ $opt->{keyattr} ];
- }
- }
- else {
- if($StrictMode) {
- croak "No value specified for 'KeyAttr' option in call to XML$dirn()";
- }
- $opt->{keyattr} = [ @DefKeyAttr ];
- }
- # Special cleanup for {valueattr} which could be arrayref or hashref
- if(exists($opt->{valueattr})) {
- if(ref($opt->{valueattr}) eq 'ARRAY') {
- $opt->{valueattrlist} = {};
- $opt->{valueattrlist}->{$_} = 1 foreach(@{ delete $opt->{valueattr} });
- }
- }
- # make sure there's nothing weird in {grouptags}
- if($opt->{grouptags}) {
- croak "Illegal value for 'GroupTags' option - expected a hashref"
- unless UNIVERSAL::isa($opt->{grouptags}, 'HASH');
- while(my($key, $val) = each %{$opt->{grouptags}}) {
- next if $key ne $val;
- croak "Bad value in GroupTags: '$key' => '$val'";
- }
- }
- # Check the {variables} option is valid and initialise variables hash
- if($opt->{variables} and !UNIVERSAL::isa($opt->{variables}, 'HASH')) {
- croak "Illegal value for 'Variables' option - expected a hashref";
- }
- if($opt->{variables}) {
- $self->{_var_values} = { %{$opt->{variables}} };
- }
- elsif($opt->{varattr}) {
- $self->{_var_values} = {};
- }
- }
- ##############################################################################
- # Method: find_xml_file()
- #
- # Helper routine for XMLin().
- # Takes a filename, and a list of directories, attempts to locate the file in
- # the directories listed.
- # Returns a full pathname on success; croaks on failure.
- #
- sub find_xml_file {
- my $self = shift;
- my $file = shift;
- my @search_path = @_;
- require File::Basename;
- require File::Spec;
- my($filename, $filedir) = File::Basename::fileparse($file);
- if($filename ne $file) { # Ignore searchpath if dir component
- return($file) if(-e $file);
- }
- else {
- my($path);
- foreach $path (@search_path) {
- my $fullpath = File::Spec->catfile($path, $file);
- return($fullpath) if(-e $fullpath);
- }
- }
- # If user did not supply a search path, default to current directory
- if(!@search_path) {
- return($file) if(-e $file);
- croak "File does not exist: $file";
- }
- croak "Could not find $file in ", join(':', @search_path);
- }
- ##############################################################################
- # Method: collapse()
- #
- # Helper routine for XMLin(). This routine really comprises the 'smarts' (or
- # value add) of this module.
- #
- # Takes the parse tree that XML::Parser produced from the supplied XML and
- # recurses through it 'collapsing' unnecessary levels of indirection (nested
- # arrays etc) to produce a data structure that is easier to work with.
- #
- # Elements in the original parser tree are represented as an element name
- # followed by an arrayref. The first element of the array is a hashref
- # containing the attributes. The rest of the array contains a list of any
- # nested elements as name+arrayref pairs:
- #
- # <element name>, [ { <attribute hashref> }, <element name>, [ ... ], ... ]
- #
- # The special element name '0' (zero) flags text content.
- #
- # This routine cuts down the noise by discarding any text content consisting of
- # only whitespace and then moves the nested elements into the attribute hash
- # using the name of the nested element as the hash key and the collapsed
- # version of the nested element as the value. Multiple nested elements with
- # the same name will initially be represented as an arrayref, but this may be
- # 'folded' into a hashref depending on the value of the keyattr option.
- #
- sub collapse {
- my $self = shift;
- # Start with the hash of attributes
-
- my $attr = shift;
- if($self->{opt}->{noattr}) { # Discard if 'noattr' set
- $attr = {};
- }
- elsif($self->{opt}->{normalisespace} == 2) {
- while(my($key, $value) = each %$attr) {
- $attr->{$key} = $self->normalise_space($value)
- }
- }
- # Do variable substitutions
- if(my $var = $self->{_var_values}) {
- while(my($key, $val) = each(%$attr)) {
- $val =~ s{\$\{([\w.]+)\}}{ $self->get_var($1) }ge;
- $attr->{$key} = $val;
- }
- }
- # Roll up 'value' attributes (but only if no nested elements)
- if(!@_ and keys %$attr == 1) {
- my($k) = keys %$attr;
- if($self->{opt}->{valueattrlist} and $self->{opt}->{valueattrlist}->{$k}) {
- return $attr->{$k};
- }
- }
- # Add any nested elements
- my($key, $val);
- while(@_) {
- $key = shift;
- $val = shift;
- if(ref($val)) {
- $val = $self->collapse(@$val);
- next if(!defined($val) and $self->{opt}->{suppressempty});
- }
- elsif($key eq '0') {
- next if($val =~ m{^\s*$}s); # Skip all whitespace content
- $val = $self->normalise_space($val)
- if($self->{opt}->{normalisespace} == 2);
- # do variable substitutions
- if(my $var = $self->{_var_values}) {
- $val =~ s{\$\{(\w+)\}}{ $self->get_var($1) }ge;
- }
-
- # look for variable definitions
- if(my $var = $self->{opt}->{varattr}) {
- if(exists $attr->{$var}) {
- $self->set_var($attr->{$var}, $val);
- }
- }
- # Collapse text content in element with no attributes to a string
- if(!%$attr and !@_) {
- return($self->{opt}->{forcecontent} ?
- { $self->{opt}->{contentkey} => $val } : $val
- );
- }
- $key = $self->{opt}->{contentkey};
- }
- # Combine duplicate attributes into arrayref if required
- if(exists($attr->{$key})) {
- if(UNIVERSAL::isa($attr->{$key}, 'ARRAY')) {
- push(@{$attr->{$key}}, $val);
- }
- else {
- $attr->{$key} = [ $attr->{$key}, $val ];
- }
- }
- elsif(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) {
- $attr->{$key} = [ $val ];
- }
- else {
- if( $key ne $self->{opt}->{contentkey}
- and (
- ($self->{opt}->{forcearray} == 1)
- or (
- (ref($self->{opt}->{forcearray}) eq 'HASH')
- and (
- $self->{opt}->{forcearray}->{$key}
- or (grep $key =~ $_, @{$self->{opt}->{forcearray}->{_regex}})
- )
- )
- )
- ) {
- $attr->{$key} = [ $val ];
- }
- else {
- $attr->{$key} = $val;
- }
- }
- }
- # Turn arrayrefs into hashrefs if key fields present
- if($self->{opt}->{keyattr}) {
- while(($key,$val) = each %$attr) {
- if(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) {
- $attr->{$key} = $self->array_to_hash($key, $val);
- }
- }
- }
- # disintermediate grouped tags
- if($self->{opt}->{grouptags}) {
- while(my($key, $val) = each(%$attr)) {
- next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1));
- next unless(exists($self->{opt}->{grouptags}->{$key}));
- my($child_key, $child_val) = %$val;
- if($self->{opt}->{grouptags}->{$key} eq $child_key) {
- $attr->{$key}= $child_val;
- }
- }
- }
- # Fold hashes containing a single anonymous array up into just the array
- my $count = scalar keys %$attr;
- if($count == 1
- and exists $attr->{anon}
- and UNIVERSAL::isa($attr->{anon}, 'ARRAY')
- ) {
- return($attr->{anon});
- }
- # Do the right thing if hash is empty, otherwise just return it
- if(!%$attr and exists($self->{opt}->{suppressempty})) {
- if(defined($self->{opt}->{suppressempty}) and
- $self->{opt}->{suppressempty} eq '') {
- return('');
- }
- return(undef);
- }
- # Roll up named elements with named nested 'value' attributes
- if($self->{opt}->{valueattr}) {
- while(my($key, $val) = each(%$attr)) {
- next unless($self->{opt}->{valueattr}->{$key});
- next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1));
- my($k) = keys %$val;
- next unless($k eq $self->{opt}->{valueattr}->{$key});
- $attr->{$key} = $val->{$k};
- }
- }
- return($attr)
- }
- ##############################################################################
- # Method: set_var()
- #
- # Called when a variable definition is encountered in the XML. (A variable
- # definition looks like <element attrname="name">value</element> where attrname
- # matches the varattr setting).
- #
- sub set_var {
- my($self, $name, $value) = @_;
- $self->{_var_values}->{$name} = $value;
- }
- ##############################################################################
- # Method: get_var()
- #
- # Called during variable substitution to get the value for the named variable.
- #
- sub get_var {
- my($self, $name) = @_;
- my $value = $self->{_var_values}->{$name};
- return $value if(defined($value));
- return '${' . $name . '}';
- }
- ##############################################################################
- # Method: normalise_space()
- #
- # Strips leading and trailing whitespace and collapses sequences of whitespace
- # characters to a single space.
- #
- sub normalise_space {
- my($self, $text) = @_;
- $text =~ s/^\s+//s;
- $text =~ s/\s+$//s;
- $text =~ s/\s\s+/ /sg;
- return $text;
- }
- ##############################################################################
- # Method: array_to_hash()
- #
- # Helper routine for collapse().
- # Attempts to 'fold' an array of hashes into an hash of hashes. Returns a
- # reference to the hash on success or the original array if folding is
- # not possible. Behaviour is controlled by 'keyattr' option.
- #
- sub array_to_hash {
- my $self = shift;
- my $name = shift;
- my $arrayref = shift;
- my $hashref = $self->new_hashref;
- my($i, $key, $val, $flag);
- # Handle keyattr => { .... }
- if(ref($self->{opt}->{keyattr}) eq 'HASH') {
- return($arrayref) unless(exists($self->{opt}->{keyattr}->{$name}));
- ($key, $flag) = @{$self->{opt}->{keyattr}->{$name}};
- for($i = 0; $i < @$arrayref; $i++) {
- if(UNIVERSAL::isa($arrayref->[$i], 'HASH') and
- exists($arrayref->[$i]->{$key})
- ) {
- $val = $arrayref->[$i]->{$key};
- if(ref($val)) {
- $self->die_or_warn("<$name> element has non-scalar '$key' key attribute");
- return($arrayref);
- }
- $val = $self->normalise_space($val)
- if($self->{opt}->{normalisespace} == 1);
- $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val")
- if(exists($hashref->{$val}));
- $hashref->{$val} = { %{$arrayref->[$i]} };
- $hashref->{$val}->{"-$key"} = $hashref->{$val}->{$key} if($flag eq '-');
- delete $hashref->{$val}->{$key} unless($flag eq '+');
- }
- else {
- $self->die_or_warn("<$name> element has no '$key' key attribute");
- return($arrayref);
- }
- }
- }
- # Or assume keyattr => [ .... ]
- else {
- my $default_keys =
- join(',', @DefKeyAttr) eq join(',', @{$self->{opt}->{keyattr}});
- ELEMENT: for($i = 0; $i < @$arrayref; $i++) {
- return($arrayref) unless(UNIVERSAL::isa($arrayref->[$i], 'HASH'));
- foreach $key (@{$self->{opt}->{keyattr}}) {
- if(defined($arrayref->[$i]->{$key})) {
- $val = $arrayref->[$i]->{$key};
- if(ref($val)) {
- $self->die_or_warn("<$name> element has non-scalar '$key' key attribute")
- if not $default_keys;
- return($arrayref);
- }
- $val = $self->normalise_space($val)
- if($self->{opt}->{normalisespace} == 1);
- $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val")
- if(exists($hashref->{$val}));
- $hashref->{$val} = { %{$arrayref->[$i]} };
- delete $hashref->{$val}->{$key};
- next ELEMENT;
- }
- }
- return($arrayref); # No keyfield matched
- }
- }
-
- # collapse any hashes which now only have a 'content' key
- if($self->{opt}->{collapseagain}) {
- $hashref = $self->collapse_content($hashref);
- }
-
- return($hashref);
- }
- ##############################################################################
- # Method: die_or_warn()
- #
- # Takes a diagnostic message and does one of three things:
- # 1. dies if strict mode is enabled
- # 2. warns if warnings are enabled but strict mode is not
- # 3. ignores message and resturns silently if neither strict mode nor warnings
- # are enabled
- #
- sub die_or_warn {
- my $self = shift;
- my $msg = shift;
- croak $msg if($StrictMode);
- carp "Warning: $msg" if($^W);
- }
- ##############################################################################
- # Method: new_hashref()
- #
- # This is a hook routine for overriding in a sub-class. Some people believe
- # that using Tie::IxHash here will solve order-loss problems.
- #
- sub new_hashref {
- my $self = shift;
- return { @_ };
- }
- ##############################################################################
- # Method: collapse_content()
- #
- # Helper routine for array_to_hash
- #
- # Arguments expected are:
- # - an XML::Simple object
- # - a hasref
- # the hashref is a former array, turned into a hash by array_to_hash because
- # of the presence of key attributes
- # at this point collapse_content avoids over-complicated structures like
- # dir => { libexecdir => { content => '$exec_prefix/libexec' },
- # localstatedir => { content => '$prefix' },
- # }
- # into
- # dir => { libexecdir => '$exec_prefix/libexec',
- # localstatedir => '$prefix',
- # }
- sub collapse_content {
- my $self = shift;
- my $hashref = shift;
- my $contentkey = $self->{opt}->{contentkey};
- # first go through the values,checking that they are fit to collapse
- foreach my $val (values %$hashref) {
- return $hashref unless ( (ref($val) eq 'HASH')
- and (keys %$val == 1)
- and (exists $val->{$contentkey})
- );
- }
- # now collapse them
- foreach my $key (keys %$hashref) {
- $hashref->{$key}= $hashref->{$key}->{$contentkey};
- }
- return $hashref;
- }
-
- ##############################################################################
- # Method: value_to_xml()
- #
- # Helper routine for XMLout() - recurses through a data structure building up
- # and returning an XML representation of that structure as a string.
- #
- # Arguments expected are:
- # - the data structure to be encoded (usually a reference)
- # - the XML tag name to use for this item
- # - a string of spaces for use as the current indent level
- #
- sub value_to_xml {
- my $self = shift;;
- # Grab the other arguments
- my($ref, $name, $indent) = @_;
- my $named = (defined($name) and $name ne '' ? 1 : 0);
- my $nl = "\n";
- my $is_root = $indent eq '' ? 1 : 0; # Warning, dirty hack!
- if($self->{opt}->{noindent}) {
- $indent = '';
- $nl = '';
- }
- # Convert to XML
-
- if(ref($ref)) {
- croak "circular data structures not supported"
- if(grep($_ == $ref, @{$self->{_ancestors}}));
- push @{$self->{_ancestors}}, $ref;
- }
- else {
- if($named) {
- return(join('',
- $indent, '<', $name, '>',
- ($self->{opt}->{noescape} ? $ref : $self->escape_value($ref)),
- '</', $name, ">", $nl
- ));
- }
- else {
- return("$ref$nl");
- }
- }
- # Unfold hash to array if possible
- if(UNIVERSAL::isa($ref, 'HASH') # It is a hash
- and keys %$ref # and it's not empty
- and $self->{opt}->{keyattr} # and folding is enabled
- and !$is_root # and its not the root element
- ) {
- $ref = $self->hash_to_array($name, $ref);
- }
- my @result = ();
- my($key, $value);
- # Handle hashrefs
- if(UNIVERSAL::isa($ref, 'HASH')) {
- # Reintermediate grouped values if applicable
- if($self->{opt}->{grouptags}) {
- $ref = $self->copy_hash($ref);
- while(my($key, $val) = each %$ref) {
- if($self->{opt}->{grouptags}->{$key}) {
- $ref->{$key} = { $self->{opt}->{grouptags}->{$key} => $val };
- }
- }
- }
- # Scan for namespace declaration attributes
- my $nsdecls = '';
- my $default_ns_uri;
- if($self->{nsup}) {
- $ref = $self->copy_hash($ref);
- $self->{nsup}->push_context();
- # Look for default namespace declaration first
- if(exists($ref->{xmlns})) {
- $self->{nsup}->declare_prefix('', $ref->{xmlns});
- $nsdecls .= qq( xmlns="$ref->{xmlns}");
- delete($ref->{xmlns});
- }
- $default_ns_uri = $self->{nsup}->get_uri('');
- # Then check all the other keys
- foreach my $qname (keys(%$ref)) {
- my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname);
- if($uri) {
- if($uri eq $xmlns_ns) {
- $self->{nsup}->declare_prefix($lname, $ref->{$qname});
- $nsdecls .= qq( xmlns:$lname="$ref->{$qname}");
- delete($ref->{$qname});
- }
- }
- }
- # Translate any remaining Clarkian names
- foreach my $qname (keys(%$ref)) {
- my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname);
- if($uri) {
- if($default_ns_uri and $uri eq $default_ns_uri) {
- $ref->{$lname} = $ref->{$qname};
- delete($ref->{$qname});
- }
- else {
- my $prefix = $self->{nsup}->get_prefix($uri);
- unless($prefix) {
- # $self->{nsup}->declare_prefix(undef, $uri);
- # $prefix = $self->{nsup}->get_prefix($uri);
- $prefix = $self->{ns_prefix}++;
- $self->{nsup}->declare_prefix($prefix, $uri);
- $nsdecls .= qq( xmlns:$prefix="$uri");
- }
- $ref->{"$prefix:$lname"} = $ref->{$qname};
- delete($ref->{$qname});
- }
- }
- }
- }
- my @nested = ();
- my $text_content = undef;
- if($named) {
- push @result, $indent, '<', $name, $nsdecls;
- }
- if(keys %$ref) {
- my $first_arg = 1;
- foreach my $key ($self->sorted_keys($name, $ref)) {
- my $value = $ref->{$key};
- next if(substr($key, 0, 1) eq '-');
- if(!defined($value)) {
- next if $self->{opt}->{suppressempty};
- unless(exists($self->{opt}->{suppressempty})
- and !defined($self->{opt}->{suppressempty})
- ) {
- carp 'Use of uninitialized value' if($^W);
- }
- if($key eq $self->{opt}->{contentkey}) {
- $text_content = '';
- }
- else {
- $value = exists($self->{opt}->{suppressempty}) ? {} : '';
- }
- }
- if(!ref($value)
- and $self->{opt}->{valueattr}
- and $self->{opt}->{valueattr}->{$key}
- ) {
- $value = { $self->{opt}->{valueattr}->{$key} => $value };
- }
- if(ref($value) or $self->{opt}->{noattr}) {
- push @nested,
- $self->value_to_xml($value, $key, "$indent ");
- }
- else {
- $value = $self->escape_value($value) unless($self->{opt}->{noescape});
- if($key eq $self->{opt}->{contentkey}) {
- $text_content = $value;
- }
- else {
- push @result, "\n$indent " . ' ' x length($name)
- if($self->{opt}->{attrindent} and !$first_arg);
- push @result, ' ', $key, '="', $value , '"';
- $first_arg = 0;
- }
- }
- }
- }
- else {
- $text_content = '';
- }
- if(@nested or defined($text_content)) {
- if($named) {
- push @result, ">";
- if(defined($text_content)) {
- push @result, $text_content;
- $nested[0] =~ s/^\s+// if(@nested);
- }
- else {
- push @result, $nl;
- }
- if(@nested) {
- push @result, @nested, $indent;
- }
- push @result, '</', $name, ">", $nl;
- }
- else {
- push @result, @nested; # Special case if no root elements
- }
- }
- else {
- push @result, " />", $nl;
- }
- $self->{nsup}->pop_context() if($self->{nsup});
- }
- # Handle arrayrefs
- elsif(UNIVERSAL::isa($ref, 'ARRAY')) {
- foreach $value (@$ref) {
- next if !defined($value) and $self->{opt}->{suppressempty};
- if(!ref($value)) {
- push @result,
- $indent, '<', $name, '>',
- ($self->{opt}->{noescape} ? $value : $self->escape_value($value)),
- '</', $name, ">$nl";
- }
- elsif(UNIVERSAL::isa($value, 'HASH')) {
- push @result, $self->value_to_xml($value, $name, $indent);
- }
- else {
- push @result,
- $indent, '<', $name, ">$nl",
- $self->value_to_xml($value, 'anon', "$indent "),
- $indent, '</', $name, ">$nl";
- }
- }
- }
- else {
- croak "Can't encode a value of type: " . ref($ref);
- }
- pop @{$self->{_ancestors}} if(ref($ref));
- return(join('', @result));
- }
- ##############################################################################
- # Method: sorted_keys()
- #
- # Returns the keys of the referenced hash sorted into alphabetical order, but
- # with the 'key' key (as in KeyAttr) first, if there is one.
- #
- sub sorted_keys {
- my($self, $name, $ref) = @_;
- return keys %$ref if $self->{opt}->{nosort};
- my %hash = %$ref;
- my $keyattr = $self->{opt}->{keyattr};
- my @key;
- if(ref $keyattr eq 'HASH') {
- if(exists $keyattr->{$name} and exists $hash{$keyattr->{$name}->[0]}) {
- push @key, $keyattr->{$name}->[0];
- delete $hash{$keyattr->{$name}->[0]};
- }
- }
- elsif(ref $keyattr eq 'ARRAY') {
- foreach (@{$keyattr}) {
- if(exists $hash{$_}) {
- push @key, $_;
- delete $hash{$_};
- last;
- }
- }
- }
- return(@key, sort keys %hash);
- }
- ##############################################################################
- # Method: escape_value()
- #
- # Helper routine for automatically escaping values for XMLout().
- # Expects a scalar data value. Returns escaped version.
- #
- sub escape_value {
- my($self, $data) = @_;
- return '' unless(defined($data));
- $data =~ s/&/&/sg;
- $data =~ s/</</sg;
- $data =~ s/>/>/sg;
- $data =~ s/"/"/sg;
- my $level = $self->{opt}->{numericescape} or return $data;
- return $self->numeric_escape($data, $level);
- }
- sub numeric_escape {
- my($self, $data, $level) = @_;
- use utf8; # required for 5.6
- if($self->{opt}->{numericescape} eq '2') {
- $data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse;
- }
- else {
- $data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse;
- }
- return $data;
- }
- ##############################################################################
- # Method: hash_to_array()
- #
- # Helper routine for value_to_xml().
- # Attempts to 'unfold' a hash of hashes into an array of hashes. Returns a
- # reference to the array on success or the original hash if unfolding is
- # not possible.
- #
- sub hash_to_array {
- my $self = shift;
- my $parent = shift;
- my $hashref = shift;
- my $arrayref = [];
- my($key, $value);
- my @keys = $self->{opt}->{nosort} ? keys %$hashref : sort keys %$hashref;
- foreach $key (@keys) {
- $value = $hashref->{$key};
- return($hashref) unless(UNIVERSAL::isa($value, 'HASH'));
- if(ref($self->{opt}->{keyattr}) eq 'HASH') {
- return($hashref) unless(defined($self->{opt}->{keyattr}->{$parent}));
- push @$arrayref, $self->copy_hash(
- $value, $self->{opt}->{keyattr}->{$parent}->[0] => $key
- );
- }
- else {
- push(@$arrayref, { $self->{opt}->{keyattr}->[0] => $key, %$value });
- }
- }
- return($arrayref);
- }
- ##############################################################################
- # Method: copy_hash()
- #
- # Helper routine for hash_to_array(). When unfolding a hash of hashes into
- # an array of hashes, we need to copy the key from the outer hash into the
- # inner hash. This routine makes a copy of the original hash so we don't
- # destroy the original data structure. You might wish to override this
- # method if you're using tied hashes and don't want them to get untied.
- #
- sub copy_hash {
- my($self, $orig, @extra) = @_;
- return { @extra, %$orig };
- }
- ##############################################################################
- # Methods required for building trees from SAX events
- ##############################################################################
- sub start_document {
- my $self = shift;
- $self->handle_options('in') unless($self->{opt});
- $self->{lists} = [];
- $self->{curlist} = $self->{tree} = [];
- }
- sub start_element {
- my $self = shift;
- my $element = shift;
- my $name = $element->{Name};
- if($self->{opt}->{nsexpand}) {
- $name = $element->{LocalName} || '';
- if($element->{NamespaceURI}) {
- $name = '{' . $element->{NamespaceURI} . '}' . $name;
- }
- }
- my $attributes = {};
- if($element->{Attributes}) { # Might be undef
- foreach my $attr (values %{$element->{Attributes}}) {
- if($self->{opt}->{nsexpand}) {
- my $name = $attr->{LocalName} || '';
- if($attr->{NamespaceURI}) {
- $name = '{' . $attr->{NamespaceURI} . '}' . $name
- }
- $name = 'xmlns' if($name eq $bad_def_ns_jcn);
- $attributes->{$name} = $attr->{Value};
- }
- else {
- $attributes->{$attr->{Name}} = $attr->{Value};
- }
- }
- }
- my $newlist = [ $attributes ];
- push @{ $self->{lists} }, $self->{curlist};
- push @{ $self->{curlist} }, $name => $newlist;
- $self->{curlist} = $newlist;
- }
- sub characters {
- my $self = shift;
- my $chars = shift;
- my $text = $chars->{Data};
- my $clist = $self->{curlist};
- my $pos = $#$clist;
-
- if ($pos > 0 and $clist->[$pos - 1] eq '0') {
- $clist->[$pos] .= $text;
- }
- else {
- push @$clist, 0 => $text;
- }
- }
- sub end_element {
- my $self = shift;
- $self->{curlist} = pop @{ $self->{lists} };
- }
- sub end_document {
- my $self = shift;
- delete($self->{curlist});
- delete($self->{lists});
- my $tree = $self->{tree};
- delete($self->{tree});
- # Return tree as-is to XMLin()
- return($tree) if($self->{nocollapse});
- # Or collapse it before returning it to SAX parser class
-
- if($self->{opt}->{keeproot}) {
- $tree = $self->collapse({}, @$tree);
- }
- else {
- $tree = $self->collapse(@{$tree->[1]});
- }
- if($self->{opt}->{datahandler}) {
- return($self->{opt}->{datahandler}->($self, $tree));
- }
- return($tree);
- }
- #*xml_in = \&XMLin;
- #*xml_out = \&XMLout;
- 1;
- __END__
- =head1 QUICK START
- Say you have a script called B<foo> and a file of configuration options
- called B<foo.xml> containing this:
- <config logdir="/var/log/foo/" debugfile="/tmp/foo.debug">
- <server name="sahara" osname="solaris" osversion="2.6">
- <address>10.0.0.101</address>
- <address>10.0.1.101</address>
- </server>
- <server name="gobi" osname="irix" osversion="6.5">
- <address>10.0.0.102</address>
- </server>
- <server name="kalahari" osname="linux" osversion="2.0.34">
- <address>10.0.0.103</address>
- <address>10.0.1.103</address>
- </server>
- </config>
- The following lines of code in B<foo>:
- use XML::Simple;
- my $config = XMLin();
- will 'slurp' the configuration options into the hashref $config (because no
- arguments are passed to C<XMLin()> the name and location of the XML file will
- be inferred from name and location of the script). You can dump out the
- contents of the hashref using Data::Dumper:
- use Data::Dumper;
- print Dumper($config);
- which will produce something like this (formatting has been adjusted for
- brevity):
- {
- 'logdir' => '/var/log/foo/',
- 'debugfile' => '/tmp/foo.debug',
- 'server' => {
- 'sahara' => {
- 'osversion' => '2.6',
- 'osname' => 'solaris',
- 'address' => [ '10.0.0.101', '10.0.1.101' ]
- },
- 'gobi' => {
- 'osversion' => '6.5',
- 'osname' => 'irix',
- 'address' => '10.0.0.102'
- },
- 'kalahari' => {
- 'osversion' => '2.0.34',
- 'osname' => 'linux',
- 'address' => [ '10.0.0.103', '10.0.1.103' ]
- }
- }
- }
- Your script could then access the name of the log directory like this:
- print $config->{logdir};
- similarly, the second address on the server 'kalahari' could be referenced as:
- print $config->{server}->{kalahari}->{address}->[1];
- What could be simpler? (Rhetorical).
- For simple requirements, that's really all there is to it. If you want to
- store your XML in a different directory or file, or pass it in as a string or
- even pass it in via some derivative of an IO::Handle, you'll need to check out
- L<"OPTIONS">. If you want to turn off or tweak the array folding feature (that
- neat little transformation that produced $config->{server}) you'll find options
- for that as well.
- If you want to generate XML (for example to write a modified version of
- $config back out as XML), check out C<XMLout()>.
- If your needs are not so simple, this may not be the module for you. In that
- case, you might want to read L<"WHERE TO FROM HERE?">.
- =head1 DESCRIPTION
- The XML::Simple module provides a simple API layer on top of an underlying XML
- parsing module (either XML::Parser or one of the SAX2 parser modules). Two
- functions are exported: C<XMLin()> and C<XMLout()>. Note: you can explicity
- request the lower case versions of the function names: C<xml_in()> and
- C<xml_out()>.
- The simplest approach is to call these two functions directly, but an
- optional object oriented interface (see L<"OPTIONAL OO INTERFACE"> below)
- allows them to be called as methods of an B<XML::Simple> object. The object
- interface can also be used at either end of a SAX pipeline.
- =head2 XMLin()
- Parses XML formatted data and returns a reference to a data structure which
- contains the same information in a more readily accessible form. (Skip
- down to L<"EXAMPLES"> below, for more sample code).
- C<XMLin()> accepts an optional XML specifier followed by zero or more 'name =>
- value' option pairs. The XML specifier can be one of the following:
- =over 4
- =item A filename
- If the filename contains no directory components C<XMLin()> will look for the
- file in each directory in the SearchPath (see L<"OPTIONS"> below) or in the
- current directory if the SearchPath option is not defined. eg:
- $ref = XMLin('/etc/params.xml');
- Note, the filename '-' can be used to parse from STDIN.
- =item undef
- If there is no XML specifier, C<XMLin()> will check the script directory and
- each of the SearchPath directories for a file with the same name as the script
- but with the extension '.xml'. Note: if you wish to specify options, you
- must specify the value 'undef'. eg:
- $ref = XMLin(undef, ForceArray => 1);
- =item A string of XML
- A string containing XML (recognised by the presence of '<' and '>' characters)
- will be parsed directly. eg:
- $ref = XMLin('<opt username="bob" password="flurp" />');
- =item An IO::Handle object
- An IO::Handle object will be read to EOF and its contents parsed. eg:
- $fh = IO::File->new('/etc/params.xml');
- $ref = XMLin($fh);
- =back
- =head2 XMLout()
- Takes a data structure (generally a hashref) and returns an XML encoding of
- that structure. If the resulting XML is parsed using C<XMLin()>, it should
- return a data structure equivalent to the original (see caveats below).
- The C<XMLout()> function can also be used to output the XML as SAX events
- see the C<Handler> option and L<"SAX SUPPORT"> for more details).
- When translating hashes to XML, hash keys which have a leading '-' will be
- silently skipped. This is the approved method for marking elements of a
- data structure which should be ignored by C<XMLout>. (Note: If these items
- were not skipped the key names would be emitted as element or attribute names
- with a leading '-' which would not be valid XML).
- =head2 Caveats
- Some care is required in creating data structures which will be passed to
- C<XMLout()>. Hash keys from the data structure will be encoded as either XML
- element names or attribute names. Therefore, you should use hash key names
- which conform to the relatively strict XML naming rules:
- Names in XML must begin with a letter. The remaining characters may be
- letters, digits, hyphens (-), underscores (_) or full stops (.). It is also
- allowable to include one colon (:) in an element name but this should only be
- used when working with namespaces (B<XML::Simple> can only usefully work with
- namespaces when teamed with a SAX Parser).
- You can use other punctuation characters in hash values (just not in hash
- keys) however B<XML::Simple> does not support dumping binary data.
- If you break these rules, the current implementation of C<XMLout()> will
- simply emit non-compliant XML which will be rejected if you try to read it
- back in. (A later version of B<XML::Simple> might take a more proactive
- approach).
- Note also that although you can nest hashes and arrays to arbitrary levels,
- circular data structures are not supported and will cause C<XMLout()> to die.
- If you wish to 'round-trip' arbitrary data structures from Perl to XML and back
- to Perl, then you should probably disable array folding (using the KeyAttr
- option) both with C<XMLout()> and with C<XMLin()>. If you still don't get the
- expected results, you may prefer to use L<XML::Dumper> which is designed for
- exactly that purpose.
- Refer to L<"WHERE TO FROM HERE?"> if C<XMLout()> is too simple for your needs.
- =head1 OPTIONS
- B<XML::Simple> supports a number of options (in fact as each release of
- B<XML::Simple> adds more options, the module's claim to the name 'Simple'
- becomes increasingly tenuous). If you find yourself repeatedly having to
- specify the same options, you might like to investigate L<"OPTIONAL OO
- INTERFACE"> below.
- If you can't be bothered reading the documentation, refer to
- L<"STRICT MODE"> to automatically catch common mistakes.
- Because there are so many options, it's hard for new users to know which ones
- are important, so here are the two you really need to know about:
- =over 4
- =item *
- check out C<ForceArray> because you'll almost certainly want to turn it on
- =item *
- make sure you know what the C<KeyAttr> option does and what its default value is
- because it may surprise you otherwise (note in particular that 'KeyAttr'
- affects both C<XMLin> and C<XMLout>)
- =back
- The option name headings below have a trailing 'comment' - a hash followed by
- two pieces of metadata:
- =over 4
- =item *
- Options are marked with 'I<in>' if they are recognised by C<XMLin()> and
- 'I<out>' if they are recognised by C<XMLout()>.
- =item *
- Each option is also flagged to indicate whether it is:
- 'important' - don't use the module until you understand this one
- 'handy' - you can skip this on the first time through
- 'advanced' - you can skip this on the second time through
- 'SAX only' - don't worry about this unless you're using SAX (or
- alternatively if you need this, you also need SAX)
- 'seldom used' - you'll probably never use this unless you were the
- person that requested the feature
- =back
- The options are listed alphabetically:
- Note: option names are no longer case sensitive so you can use the mixed case
- versions shown here; all lower case as required by versions 2.03 and earlier;
- or you can add underscores between the words (eg: key_attr).
- =head2 AttrIndent => 1 I<# out - handy>
- When you are using C<XMLout()>, enable this option to have attributes printed
- one-per-line with sensible indentation rather than all on one line.
- =head2 Cache => [ cache schemes ] I<# in - advanced>
- Because loading the B<XML::Parser> module and parsing an XML file can consume a
- significant number of CPU cycles, it is often desirable to cache the output of
- C<XMLin()> for later reuse.
- When parsing from a named file, B<XML::Simple> supports a number of caching
- schemes. The 'Cache' option may be used to specify one or more schemes (using
- an anonymous array). Each scheme will be tried in turn in the hope of finding
- a cached pre-parsed representation of the XML file. If no cached copy is
- found, the file will be parsed and the first cache scheme in the list will be
- used to save a copy of the results. The following cache schemes have been
- implemented:
- =over 4
- =item storable
- Utilises B<Storable.pm> to read/write a cache file with the same name as the
- XML file but with the extension .stor
- =item memshare
- When a file is first parsed, a copy of the resulting data structure is retained
- in memory in the B<XML::Simple> module's namespace. Subsequent calls to parse
- the same file will return a reference to this structure. This cached version
- will persist only for the life of the Perl interpreter (which in the case of
- mod_perl for example, may be some significant time).
- Because each caller receives a reference to the same data structure, a change
- made by one caller will be visible to all. For this reason, the reference
- returned should be treated as read-only.
- =item memcopy
- This scheme works identically to 'memshare' (above) except that each caller
- receives a reference to a new data structure which is a copy of the cached
- version. Copying the data structure will add a little processing overhead,
- therefore this scheme should only be used where the caller intends to modify
- the data structure (or wishes to protect itself from others who might). This
- scheme uses B<Storable.pm> to perform the copy.
- =back
- Warning! The memory-based caching schemes compare the timestamp on the file to
- the time when it was last parsed. If the file is stored on an NFS filesystem
- (or other network share) and the clock on the file server is not exactly
- synchronised with the clock where your script is run, updates to the source XML
- file may appear to be ignored.
- =head2 ContentKey => 'keyname' I<# in+out - seldom used>
- When text content is parsed to a hash value, this option let's you specify a
- name for the hash key to override the default 'content'. So for example:
- XMLin('<opt one="1">Text</opt>', ContentKey => 'text')
- will parse to:
- { 'one' => 1, 'text' => 'Text' }
- instead of:
- { 'one' => 1, 'content' => 'Text' }
- C<XMLout()> will also honour the value of this option when converting a hashref
- to XML.
- You can also prefix your selected key name with a '-' character to have
- C<XMLin()> try a little harder to eliminate unnecessary 'content' keys after
- array folding. For example:
- XMLin(
- '<opt><item name="one">First</item><item name="two">Second</item></opt>',
- KeyAttr => {item => 'name'},
- ForceArray => [ 'item' ],
- ContentKey => '-content'
- )
- will parse to:
- {
- 'item' => {
- 'one' => 'First'
- 'two' => 'Second'
- }
- }
- rather than this (without the '-'):
- {
- 'item' => {
- 'one' => { 'content' => 'First' }
- 'two' => { 'content' => 'Second' }
- }
- }
- =head2 DataHandler => code_ref I<# in - SAX only>
- When you use an B<XML::Simple> object as a SAX handler, it will return a
- 'simple tree' data structure in the same format as C<XMLin()> would return. If
- this option is set (to a subroutine reference), then when the tree is built the
- subroutine will be called and passed two arguments: a reference to the
- B<XML::Simple> object and a reference to the data tree. The return value from
- the subroutine will be returned to the SAX driver. (See L<"SAX SUPPORT"> for
- more details).
- =head2 ForceArray => 1 I<# in - important>
- This option should be set to '1' to force nested elements to be represented
- as arrays even when there is only one. Eg, with ForceArray enabled, this
- XML:
- <opt>
- <name>value</name>
- </opt>
- would parse to this:
- {
- 'name' => [
- 'value'
- ]
- }
- instead of this (the default):
- {
- 'name' => 'value'
- }
- This option is especially useful if the data structure is likely to be written
- back out as XML and the default behaviour of rolling single nested elements up
- into attributes is not desirable.
- If you are using the array folding feature, you should almost certainly enable
- this option. If you do not, single nested elements will not be parsed to
- arrays and therefore will not be candidates for folding to a hash. (Given that
- the default value of 'KeyAttr' enables array folding, the default value of this
- option should probably also have been enabled too - sorry).
- =head2 ForceArray => [ names ] I<# in - important>
- This alternative (and preferred) form of the 'ForceArray' option allows you to
- specify a list of element names which should always be forced into an array
- representation, rather than the 'all or nothing' approach above.
- It is also possible (since version 2.05) to include compiled regular
- expressions in the list - any element names which match the pattern will be
- forced to arrays. If the list contains only a single regex, then it is not
- necessary to enclose it in an arrayref. Eg:
- ForceArray => qr/_list$/
- =head2 ForceContent => 1 I<# in - seldom used>
- When C<XMLin()> parses elements which have text content as well as attributes,
- the text content must be represented as a hash value rather than a simple
- scalar. This option allows you to force text content to always parse to
- a hash value even when there are no attributes. So for example:
- XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1)
- will parse to:
- {
- 'x' => { 'content' => 'text1' },
- 'y' => { 'a' => 2, 'content' => 'text2' }
- }
- instead of:
- {
- 'x' => 'text1',
- 'y' => { 'a' => 2, 'content' => 'text2' }
- }
- =head2 GroupTags => { grouping tag => grouped tag } I<# in+out - handy>
- You can use this option to eliminate extra levels of indirection in your Perl
- data structure. For example this XML:
- <opt>
- <searchpath>
- <dir>/usr/bin</dir>
- <dir>/usr/local/bin</dir>
- <dir>/usr/X11/bin</dir>
- </searchpath>
- </opt>
- Would normally be read into a structure like this:
- {
- searchpath => {
- dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
- }
- }
- But when read in with the appropriate value for 'GroupTags':
- my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' });
- It will return this simpler structure:
- {
- searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
- }
- The grouping element (C<< <searchpath> >> in the example) must not contain any
- attributes or elements other than the grouped element.
- You can specify multiple 'grouping element' to 'grouped element' mappings in
- the same hashref. If this option is combined with C<KeyAttr>, the array
- folding will occur first and then the grouped element names will be eliminated.
- C<XMLout> will also use the grouptag mappings to re-introduce the tags around
- the grouped elements. Beware though that this will occur in all places that
- the 'grouping tag' name occurs - you probably don't want to use the same name
- for elements as well as attributes.
- =head2 Handler => object_ref I<# out - SAX only>
- Use the 'Handler' option to have C<XMLout()> generate SAX events rather than
- returning a string of XML. For more details see L<"SAX SUPPORT"> below.
- Note: the current implementation of this option generates a string of XML
- and uses a SAX parser to translate it into SAX events. The normal encoding
- rules apply here - your data must be UTF8 encoded unless you specify an
- alternative encoding via the 'XMLDecl' option; and by the time the data reaches
- the handler object, it will be in UTF8 form regardless of the encoding you
- supply. A future implementation of this option may generate the events
- directly.
- =head2 KeepRoot => 1 I<# in+out - handy>
- In its attempt to return a data structure free of superfluous detail and
- unnecessary levels of indirection, C<XMLin()> normally discards the root
- element name. Setting the 'KeepRoot' option to '1' will cause the root element
- name to be retained. So after executing this code:
- $config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1)
- You'll be able to reference the tempdir as
- C<$config-E<gt>{config}-E<gt>{tempdir}> instead of the default
- C<$config-E<gt>{tempdir}>.
- Similarly, setting the 'KeepRoot' option to '1' will tell C<XMLout()> that the
- data structure already contains a root element name and it is not necessary to
- add another.
- =head2 KeyAttr => [ list ] I<# in+out - important>
- This option controls the 'array folding' feature which translates nested
- elements from an array to a hash. It also controls the 'unfolding' of hashes
- to arrays.
- For example, this XML:
- <opt>
- <user login="grep" fullname="Gary R Epstein" />
- <user login="stty" fullname="Simon T Tyson" />
- </opt>
- would, by default, parse to this:
- {
- 'user' => [
- {
- 'login' => 'grep',
- 'fullname' => 'Gary R Epstein'
- },
- {
- 'login' => 'stty',
- 'fullname' => 'Simon T Tyson'
- }
- ]
- }
- If the option 'KeyAttr => "login"' were used to specify that the 'login'
- attribute is a key, the same XML would parse to:
- {
- 'user' => {
- 'stty' => {
- 'fullname' => 'Simon T Tyson'
- },
- 'grep' => {
- 'fullname' => 'Gary R Epstein'
- }
- }
- }
- The key attribute names should be supplied in an arrayref if there is more
- than one. C<XMLin()> will attempt to match attribute names in the order
- supplied. C<XMLout()> will use the first attribute name supplied when
- 'unfolding' a hash into an array.
- Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id']. If you do
- not want folding on input or unfolding on output you must setting this option
- to an empty list to disable the feature.
- Note 2: If you wish to use this option, you should also enable the
- C<ForceArray> option. Without 'ForceArray', a single nested element will be
- rolled up into a scalar rather than an array and therefore will not be folded
- (since only arrays get folded).
- =head2 KeyAttr => { list } I<# in+out - important>
- This alternative (and preferred) method of specifiying the key attributes
- allows more fine grained control over which elements are folded and on which
- attributes. For example the option 'KeyAttr => { package => 'id' } will cause
- any package elements to be folded on the 'id' attribute. No other elements
- which have an 'id' attribute will be folded at all.
- Note: C<XMLin()> will generate a warning (or a fatal error in L<"STRICT MODE">)
- if this syntax is used and an element which does not have the specified key
- attribute is encountered (eg: a 'package' element without an 'id' attribute, to
- use the example above). Warnings will only be generated if B<-w> is in force.
- Two further variations are made possible by prefixing a '+' or a '-' character
- to the attribute name:
- The option 'KeyAttr => { user => "+login" }' will cause this XML:
- <opt>
- <user login="grep" fullname="Gary R Epstein" />
- <user login="stty" fullname="Simon T Tyson" />
- </opt>
- to parse to this data structure:
- {
- 'user' => {
- 'stty' => {
- 'fullname' => 'Simon T Tyson',
- 'login' => 'stty'
- },
- 'grep' => {
- 'fullname' => 'Gary R Epstein',
- 'login' => 'grep'
- }
- }
- }
- The '+' indicates that the value of the key attribute should be copied rather
- than moved to the folded hash key.
- A '-' prefix would produce this result:
- {
- 'user' => {
- 'stty' => {
- 'fullname' => 'Simon T Tyson',
- '-login' => 'stty'
- },
- 'grep' => {
- 'fullname' => 'Gary R Epstein',
- '-login' => 'grep'
- }
- }
- }
- As described earlier, C<XMLout> will ignore hash keys starting with a '-'.
- =head2 NoAttr => 1 I<# in+out - handy>
- When used with C<XMLout()>, the generated XML will contain no attributes.
- All hash key/values will be represented as nested elements instead.
- When used with C<XMLin()>, any attributes in the XML will be ignored.
- =head2 NoEscape => 1 I<# out - seldom used>
- By default, C<XMLout()> will translate the characters 'E<lt>', 'E<gt>', '&' and
- '"' to '<', '>', '&' and '"' respectively. Use this option to
- suppress escaping (presumably because you've already escaped the data in some
- more sophisticated manner).
- =head2 NoIndent => 1 I<# out - seldom used>
- Set this option to 1 to disable C<XMLout()>'s default 'pretty printing' mode.
- With this option enabled, the XML output will all be on one line (unless there
- are newlines in the data) - this may be easier for downstream processing.
- =head2 NoSort => 1 I<# out - seldom used>
- Newer versions of XML::Simple sort elements and attributes alphabetically (*),
- by default. Enable this option to suppress the sorting - possibly for
- backwards compatibility.
- * Actually, sorting is alphabetical but 'key' attribute or element names (as in
- 'KeyAttr') sort first. Also, when a hash of hashes is 'unfolded', the elements
- are sorted alphabetically by the value of the key field.
- =head2 NormaliseSpace => 0 | 1 | 2 I<# in - handy>
- This option controls how whitespace in text content is handled. Recognised
- values for the option are:
- =over 4
- =item *
- 0 = (default) whitespace is passed through unaltered (except of course for the
- normalisation of whitespace in attribute values which is mandated by the XML
- recommendation)
- =item *
- 1 = whitespace is normalised in any value used as a hash key (normalising means
- removing leading and trailing whitespace and collapsing sequences of whitespace
- characters to a single space)
- =item *
- 2 = whitespace is normalised in all text content
- =back
- Note: you can spell this option with a 'z' if that is more natural for you.
- =head2 NSExpand => 1 I<# in+out handy - SAX only>
- This option controls namespace expansion - the translation of element and
- attribute names of the form 'prefix:name' to '{uri}name'. For example the
- element name 'xsl:template' might be expanded to:
- '{http://www.w3.org/1999/XSL/Transform}template'.
- By default, C<XMLin()> will return element names and attribute names exactly as
- they appear in the XML. Setting this option to 1 will cause all element and
- attribute names to be expanded to include their namespace prefix.
- I<Note: You must be using a SAX parser for this option to work (ie: it does not
- work with XML::Parser)>.
- This option also controls whether C<XMLout()> performs the reverse translation
- from '{uri}name' back to 'prefix:name'. The default is no translation. If
- your data contains expanded names, you should set this option to 1 otherwise
- C<XMLout> will emit XML which is not well formed.
- I<Note: You must have the XML::NamespaceSupport module installed if you want
- C<XMLout()> to translate URIs back to prefixes>.
- =head2 NumericEscape => 0 | 1 | 2 I<# out - handy>
- Use this option to have 'high' (non-ASCII) characters in your Perl data
- structure converted to numeric entities (eg: €) in the XML output. Three
- levels are possible:
- 0 - default: no numeric escaping (OK if you're writing out UTF8)
- 1 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output
- 2 - all characters above 0x7F are escaped (good for plain ASCII output)
- =head2 OutputFile => <file specifier> I<# out - handy>
- The default behaviour of C<XMLout()> is to return the XML as a string. If you
- wish to write the XML to a file, simply supply the filename using the
- 'OutputFile' option.
- This option also accepts an IO handle object - especially useful in Perl 5.8.0
- and later for output using an encoding other than UTF-8, eg:
- open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!";
- XMLout($ref, OutputFile => $fh);
- Note, XML::Simple does not require that the object you pass in to the
- OutputFile option inherits from L<IO::Handle> - it simply assumes the object
- supports a C<print> method.
- =head2 ParserOpts => [ XML::Parser Options ] I<# in - don't use this>
- I<Note: This option is now officially deprecated. If you find it useful, email
- the author with an example of what you use it for. Do not use this option to
- set the ProtocolEncoding, that's just plain wrong - fix the XML>.
- This option allows you to pass parameters to the constructor of the underlying
- XML::Parser object (which of course assumes you're not using SAX).
- =head2 RootName => 'string' I<# out - handy>
- By default, when C<XMLout()> generates XML, the root element will be named
- 'opt'. This option allows you to specify an alternative name.
- Specifying either undef or the empty string for the RootName option will
- produce XML with no root elements. In most cases the resulting XML fragment
- will not be 'well formed' and therefore could not be read back in by C<XMLin()>.
- Nevertheless, the option has been found to be useful in certain circumstances.
- =head2 SearchPath => [ list ] I<# in - handy>
- If you pass C<XMLin()> a filename, but the filename include no directory
- component, you can use this option to specify which directories should be
- searched to locate the file. You might use this option to search first in the
- user's home directory, then in a global directory such as /etc.
- If a filename is provided to C<XMLin()> but SearchPath is not defined, the
- file is assumed to be in the current directory.
- If the first parameter to C<XMLin()> is undefined, the default SearchPath
- will contain only the directory in which the script itself is located.
- Otherwise the default SearchPath will be empty.
- =head2 SuppressEmpty => 1 | '' | undef I<# in+out - handy>
- This option controls what C<XMLin()> should do with empty elements (no
- attributes and no content). The default behaviour is to represent them as
- empty hashes. Setting this option to a true value (eg: 1) will cause empty
- elements to be skipped altogether. Setting the option to 'undef' or the empty
- string will cause empty elements to be represented as the undefined value or
- the empty string respectively. The latter two alternatives are a little
- easier to test for in your code than a hash with no keys.
- The option also controls what C<XMLout()> does with undefined values. Setting
- the option to undef causes undefined values to be output as empty elements
- (rather than empty attributes), it also suppresses the generation of warnings
- about undefined values. Setting the option to a true value (eg: 1) causes
- undefined values to be skipped altogether on output.
- =head2 ValueAttr => [ names ] I<# in - handy>
- Use this option to deal elements which always have a single attribute and no
- content. Eg:
- <opt>
- <colour value="red" />
- <size value="XXL" />
- </opt>
- Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to:
- {
- colour => 'red',
- size => 'XXL'
- }
- instead of this (the default):
- {
- colour => { value => 'red' },
- size => { value => 'XXL' }
- }
- Note: This form of the ValueAttr option is not compatible with C<XMLout()> -
- since the attribute name is discarded at parse time, the original XML cannot be
- reconstructed.
- =head2 ValueAttr => { element => attribute, ... } I<# in+out - handy>
- This (preferred) form of the ValueAttr option requires you to specify both
- the element and the attribute names. This is not only safer, it also allows
- the original XML to be reconstructed by C<XMLout()>.
- Note: You probably don't want to use this option and the NoAttr option at the
- same time.
- =head2 Variables => { name => value } I<# in - handy>
- This option allows variables in the XML to be expanded when the file is read.
- (there is no facility for putting the variable names back if you regenerate
- XML using C<XMLout>).
- A 'variable' is any text of the form C<${name}> which occurs in an attribute
- value or in the text content of an element. If 'name' matches a key in the
- supplied hashref, C<${name}> will be replaced with the corresponding value from
- the hashref. If no matching key is found, the variable will not be replaced.
- Names must match the regex: C<[\w.]+> (ie: only 'word' characters and dots are
- allowed).
- =head2 VarAttr => 'attr_name' I<# in - handy>
- In addition to the variables defined using C<Variables>, this option allows
- variables to be defined in the XML. A variable definition consists of an
- element with an attribute called 'attr_name' (the value of the C<VarAttr>
- option). The value of the attribute will be used as the variable name and the
- text content of the element will be used as the value. A variable defined in
- this way will override a variable defined using the C<Variables> option. For
- example:
- XMLin( '<opt>
- <dir name="prefix">/usr/local/apache</dir>
- <dir name="exec_prefix">${prefix}</dir>
- <dir name="bindir">${exec_prefix}/bin</dir>
- </opt>',
- VarAttr => 'name', ContentKey => '-content'
- );
- produces the following data structure:
- {
- dir => {
- prefix => '/usr/local/apache',
- exec_prefix => '/usr/local/apache',
- bindir => '/usr/local/apache/bin',
- }
- }
- =head2 XMLDecl => 1 or XMLDecl => 'string' I<# out - handy>
- If you want the output from C<XMLout()> to start with the optional XML
- declaration, simply set the option to '1'. The default XML declaration is:
- <?xml version='1.0' standalone='yes'?>
- If you want some other string (for example to declare an encoding value), set
- the value of this option to the complete string you require.
- =head1 OPTIONAL OO INTERFACE
- The procedural interface is both simple and convenient however there are a
- couple of reasons why you might prefer to use the object oriented (OO)
- interface:
- =over 4
- =item *
- to define a set of default values which should be used on all subsequent calls
- to C<XMLin()> or C<XMLout()>
- =item *
- to override methods in B<XML::Simple> to provide customised behaviour
- =back
- The default values for the options described above are unlikely to suit
- everyone. The OO interface allows you to effectively override B<XML::Simple>'s
- defaults with your preferred values. It works like this:
- First create an XML::Simple parser object with your preferred defaults:
- my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1);
- then call C<XMLin()> or C<XMLout()> as a method of that object:
- my $ref = $xs->XMLin($xml);
- my $xml = $xs->XMLout($ref);
- You can also specify options when you make the method calls and these values
- will be merged with the values specified when the object was created. Values
- specified in a method call take precedence.
- Note: when called as methods, the C<XMLin()> and C<XMLout()> routines may be
- called as C<xml_in()> or C<xml_out()>. The method names are aliased so the
- only difference is the aesthetics.
- =head2 Parsing Methods
- You can explicitly call one of the following methods rather than rely on the
- C<xml_in()> method automatically determining whether the target to be parsed is
- a string, a file or a filehandle:
- =over 4
- =item parse_string(text)
- Works exactly like the C<xml_in()> method but assumes the first argument is
- a string of XML (or a reference to a scalar containing a string of XML).
- =item parse_file(filename)
- Works exactly like the C<xml_in()> method but assumes the first argument is
- the name of a file containing XML.
- =item parse_fh(file_handle)
- Works exactly like the C<xml_in()> method but assumes the first argument is
- a filehandle which can be read to get XML.
- =back
- =head2 Hook Methods
- You can make your own class which inherits from XML::Simple and overrides
- certain behaviours. The following methods may provide useful 'hooks' upon
- which to hang your modified behaviour. You may find other undocumented methods
- by examining the source, but those may be subject to change in future releases.
- =over 4
- =item handle_options(direction, name => value ...)
- This method will be called when one of the parsing methods or the C<XMLout()>
- method is called. The initial argument will be a string (either 'in' or 'out')
- and the remaining arguments will be name value pairs.
- =item default_config_file()
- Calculates and returns the name of the file which should be parsed if no
- filename is passed to C<XMLin()> (default: C<$0.xml>).
- =item build_simple_tree(filename, string)
- Called from C<XMLin()> or any of the parsing methods. Takes either a file name
- as the first argument or C<undef> followed by a 'string' as the second
- argument. Returns a simple tree data structure. You could override this
- method to apply your own transformations before the data structure is returned
- to the caller.
- =item new_hashref()
- When the 'simple tree' data structure is being built, this method will be
- called to create any required anonymous hashrefs.
- =item sorted_keys(name, hashref)
- Called when C<XMLout()> is translating a hashref to XML. This routine returns
- a list of hash keys in the order that the corresponding attributes/elements
- should appear in the output.
- =item escape_value(string)
- Called from C<XMLout()>, takes a string and returns a copy of the string with
- XML character escaping rules applied.
- =item numeric_escape(string)
- Called from C<escape_value()>, to handle non-ASCII characters (depending on the
- value of the NumericEscape option).
- =item copy_hash(hashref, extra_key => value, ...)
- Called from C<XMLout()>, when 'unfolding' a hash of hashes into an array of
- hashes. You might wish to override this method if you're using tied hashes and
- don't want them to get untied.
- =back
- =head2 Cache Methods
- XML::Simple implements three caching schemes ('storable', 'memshare' and
- 'memcopy'). You can implement a custom caching scheme by implementing
- two methods - one for reading from the cache and one for writing to it.
- For example, you might implement a new 'dbm' scheme that stores cached data
- structures using the L<MLDBM> module. First, you would add a
- C<cache_read_dbm()> method which accepted a filename for use as a lookup key
- and returned a data structure on success, or undef on failure. Then, you would
- implement a C<cache_read_dbm()> method which accepted a data structure and a
- filename.
- You would use this caching scheme by specifying the option:
- Cache => [ 'dbm' ]
- =head1 STRICT MODE
- If you import the B<XML::Simple> routines like this:
- use XML::Simple qw(:strict);
- the following common mistakes will be detected and treated as fatal errors
- =over 4
- =item *
- Failing to explicitly set the C<KeyAttr> option - if you can't be bothered
- reading about this option, turn it off with: KeyAttr => [ ]
- =item *
- Failing to explicitly set the C<ForceArray> option - if you can't be bothered
- reading about this option, set it to the safest mode with: ForceArray => 1
- =item *
- Setting ForceArray to an array, but failing to list all the elements from the
- KeyAttr hash.
- =item *
- Data error - KeyAttr is set to say { part => 'partnum' } but the XML contains
- one or more E<lt>partE<gt> elements without a 'partnum' attribute (or nested
- element). Note: if strict mode is not set but -w is, this condition triggers a
- warning.
- =item *
- Data error - as above, but non-unique values are present in the key attribute
- (eg: more than one E<lt>partE<gt> element with the same partnum). This will
- also trigger a warning if strict mode is not enabled.
- =item *
- Data error - as above, but value of key attribute (eg: partnum) is not a
- scalar string (due to nested elements etc). This will also trigger a warning
- if strict mode is not enabled.
- =back
- =head1 SAX SUPPORT
- From version 1.08_01, B<XML::Simple> includes support for SAX (the Simple API
- for XML) - specifically SAX2.
- In a typical SAX application, an XML parser (or SAX 'driver') module generates
- SAX events (start of element, character data, end of element, etc) as it parses
- an XML document and a 'handler' module processes the events to extract the
- required data. This simple model allows for some interesting and powerful
- possibilities:
- =over 4
- =item *
- Applications written to the SAX API can extract data from huge XML documents
- without the memory overheads of a DOM or tree API.
- =item *
- The SAX API allows for plug and play interchange of parser modules without
- having to change your code to fit a new module's API. A number of SAX parsers
- are available with capabilities ranging from extreme portability to blazing
- performance.
- =item *
- A SAX 'filter' module can implement both a handler interface for receiving
- data and a generator interface for passing modified data on to a downstream
- handler. Filters can be chained together in 'pipelines'.
- =item *
- One filter module might split a data stream to direct data to two or more
- downstream handlers.
- =item *
- Generating SAX events is not the exclusive preserve of XML parsing modules.
- For example, a module might extract data from a relational database using DBI
- and pass it on to a SAX pipeline for filtering and formatting.
- =back
- B<XML::Simple> can operate at either end of a SAX pipeline. For example,
- you can take a data structure in the form of a hashref and pass it into a
- SAX pipeline using the 'Handler' option on C<XMLout()>:
- use XML::Simple;
- use Some::SAX::Filter;
- use XML::SAX::Writer;
- my $ref = {
- .... # your data here
- };
- my $writer = XML::SAX::Writer->new();
- my $filter = Some::SAX::Filter->new(Handler => $writer);
- my $simple = XML::Simple->new(Handler => $filter);
- $simple->XMLout($ref);
- You can also put B<XML::Simple> at the opposite end of the pipeline to take
- advantage of the simple 'tree' data structure once the relevant data has been
- isolated through filtering:
- use XML::SAX;
- use Some::SAX::Filter;
- use XML::Simple;
- my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']);
- my $filter = Some::SAX::Filter->new(Handler => $simple);
- my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
- my $ref = $parser->parse_uri('some_huge_file.xml');
- print $ref->{part}->{'555-1234'};
- You can build a filter by using an XML::Simple object as a handler and setting
- its DataHandler option to point to a routine which takes the resulting tree,
- modifies it and sends it off as SAX events to a downstream handler:
- my $writer = XML::SAX::Writer->new();
- my $filter = XML::Simple->new(
- DataHandler => sub {
- my $simple = shift;
- my $data = shift;
- # Modify $data here
- $simple->XMLout($data, Handler => $writer);
- }
- );
- my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
- $parser->parse_uri($filename);
- I<Note: In this last example, the 'Handler' option was specified in the call to
- C<XMLout()> but it could also have been specified in the constructor>.
- =head1 ENVIRONMENT
- If you don't care which parser module B<XML::Simple> uses then skip this
- section entirely (it looks more complicated than it really is).
- B<XML::Simple> will default to using a B<SAX> parser if one is available or
- B<XML::Parser> if SAX is not available.
- You can dictate which parser module is used by setting either the environment
- variable 'XML_SIMPLE_PREFERRED_PARSER' or the package variable
- $XML::Simple::PREFERRED_PARSER to contain the module name. The following rules
- are used:
- =over 4
- =item *
- The package variable takes precedence over the environment variable if both are defined. To force B<XML::Simple> to ignore the environment settings and use
- its default rules, you can set the package variable to an empty string.
- =item *
- If the 'preferred parser' is set to the string 'XML::Parser', then
- L<XML::Parser> will be used (or C<XMLin()> will die if L<XML::Parser> is not
- installed).
- =item *
- If the 'preferred parser' is set to some other value, then it is assumed to be
- the name of a SAX parser module and is passed to L<XML::SAX::ParserFactory.>
- If L<XML::SAX> is not installed, or the requested parser module is not
- installed, then C<XMLin()> will die.
- =item *
- If the 'preferred parser' is not defined at all (the normal default
- state), an attempt will be made to load L<XML::SAX>. If L<XML::SAX> is
- installed, then a parser module will be selected according to
- L<XML::SAX::ParserFactory>'s normal rules (which typically means the last SAX
- parser installed).
- =item *
- if the 'preferred parser' is not defined and B<XML::SAX> is not
- installed, then B<XML::Parser> will be used. C<XMLin()> will die if
- L<XML::Parser> is not installed.
- =back
- Note: The B<XML::SAX> distribution includes an XML parser written entirely in
- Perl. It is very portable but it is not very fast. You should consider
- installing L<XML::LibXML> or L<XML::SAX::Expat> if they are available for your
- platform.
- =head1 ERROR HANDLING
- The XML standard is very clear on the issue of non-compliant documents. An
- error in parsing any single element (for example a missing end tag) must cause
- the whole document to be rejected. B<XML::Simple> will die with an appropriate
- message if it encounters a parsing error.
- If dying is not appropriate for your application, you should arrange to call
- C<XMLin()> in an eval block and look for errors in $@. eg:
- my $config = eval { XMLin() };
- PopUpMessage($@) if($@);
- Note, there is a common misconception that use of B<eval> will significantly
- slow down a script. While that may be true when the code being eval'd is in a
- string, it is not true of code like the sample above.
- =head1 EXAMPLES
- When C<XMLin()> reads the following very simple piece of XML:
- <opt username="testuser" password="frodo"></opt>
- it returns the following data structure:
- {
- 'username' => 'testuser',
- 'password' => 'frodo'
- }
- The identical result could have been produced with this alternative XML:
- <opt username="testuser" password="frodo" />
- Or this (although see 'ForceArray' option for variations):
- <opt>
- <username>testuser</username>
- <password>frodo</password>
- </opt>
- Repeated nested elements are represented as anonymous arrays:
- <opt>
- <person firstname="Joe" lastname="Smith">
- <email>joe@smith.com</email>
- <email>jsmith@yahoo.com</email>
- </person>
- <person firstname="Bob" lastname="Smith">
- <email>bob@smith.com</email>
- </person>
- </opt>
- {
- 'person' => [
- {
- 'email' => [
- 'joe@smith.com',
- 'jsmith@yahoo.com'
- ],
- 'firstname' => 'Joe',
- 'lastname' => 'Smith'
- },
- {
- 'email' => 'bob@smith.com',
- 'firstname' => 'Bob',
- 'lastname' => 'Smith'
- }
- ]
- }
- Nested elements with a recognised key attribute are transformed (folded) from
- an array into a hash keyed on the value of that attribute (see the C<KeyAttr>
- option):
- <opt>
- <person key="jsmith" firstname="Joe" lastname="Smith" />
- <person key="tsmith" firstname="Tom" lastname="Smith" />
- <person key="jbloggs" firstname="Joe" lastname="Bloggs" />
- </opt>
- {
- 'person' => {
- 'jbloggs' => {
- 'firstname' => 'Joe',
- 'lastname' => 'Bloggs'
- },
- 'tsmith' => {
- 'firstname' => 'Tom',
- 'lastname' => 'Smith'
- },
- 'jsmith' => {
- 'firstname' => 'Joe',
- 'lastname' => 'Smith'
- }
- }
- }
- The <anon> tag can be used to form anonymous arrays:
- <opt>
- <head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head>
- <data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data>
- <data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data>
- <data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data>
- </opt>
- {
- 'head' => [
- [ 'Col 1', 'Col 2', 'Col 3' ]
- ],
- 'data' => [
- [ 'R1C1', 'R1C2', 'R1C3' ],
- [ 'R2C1', 'R2C2', 'R2C3' ],
- [ 'R3C1', 'R3C2', 'R3C3' ]
- ]
- }
- Anonymous arrays can be nested to arbirtrary levels and as a special case, if
- the surrounding tags for an XML document contain only an anonymous array the
- arrayref will be returned directly rather than the usual hashref:
- <opt>
- <anon><anon>Col 1</anon><anon>Col 2</anon></anon>
- <anon><anon>R1C1</anon><anon>R1C2</anon></anon>
- <anon><anon>R2C1</anon><anon>R2C2</anon></anon>
- </opt>
- [
- [ 'Col 1', 'Col 2' ],
- [ 'R1C1', 'R1C2' ],
- [ 'R2C1', 'R2C2' ]
- ]
- Elements which only contain text content will simply be represented as a
- scalar. Where an element has both attributes and text content, the element
- will be represented as a hashref with the text content in the 'content' key
- (see the C<ContentKey> option):
- <opt>
- <one>first</one>
- <two attr="value">second</two>
- </opt>
- {
- 'one' => 'first',
- 'two' => { 'attr' => 'value', 'content' => 'second' }
- }
- Mixed content (elements which contain both text content and nested elements)
- will be not be represented in a useful way - element order and significant
- whitespace will be lost. If you need to work with mixed content, then
- XML::Simple is not the right tool for your job - check out the next section.
- =head1 WHERE TO FROM HERE?
- B<XML::Simple> is able to present a simple API because it makes some
- assumptions on your behalf. These include:
- =over 4
- =item *
- You're not interested in text content consisting only of whitespace
- =item *
- You don't mind that when things get slurped into a hash the order is lost
- =item *
- You don't want fine-grained control of the formatting of generated XML
- =item *
- You would never use a hash key that was not a legal XML element name
- =item *
- You don't need help converting between different encodings
- =back
- In a serious XML project, you'll probably outgrow these assumptions fairly
- quickly. This section of the document used to offer some advice on chosing a
- more powerful option. That advice has now grown into the 'Perl-XML FAQ'
- document which you can find at: L<http://perl-xml.sourceforge.net/faq/>
- The advice in the FAQ boils down to a quick explanation of tree versus
- event based parsers and then recommends:
- For event based parsing, use SAX (do not set out to write any new code for
- XML::Parser's handler API - it is obselete).
- For tree-based parsing, you could choose between the 'Perlish' approach of
- L<XML::Twig> and more standards based DOM implementations - preferably one with
- XPath support.
- =head1 SEE ALSO
- B<XML::Simple> requires either L<XML::Parser> or L<XML::SAX>.
- To generate documents with namespaces, L<XML::NamespaceSupport> is required.
- The optional caching functions require L<Storable>.
- Answers to Frequently Asked Questions about XML::Simple are bundled with this
- distribution as: L<XML::Simple::FAQ>
- =head1 COPYRIGHT
- Copyright 1999-2004 Grant McLean E<lt>grantm@cpan.orgE<gt>
- This library is free software; you can redistribute it and/or modify it
- under the same terms as Perl itself.
- =cut
|