perl & xml Библиотека Программиста
TRANSCRIPT
-
7/25/2019 Perl & XML
1/208
-
7/25/2019 Perl & XML
2/208
Perl & XML
Jason Mclntosh & Erik T. Ray
O REILLY 5
Beijing Cambridge Farn nam Koln Paris Sebaslopol Taipei Tokyo
-
7/25/2019 Perl & XML
3/208
P e r l & X M L
.
2 0 0 3
-
7/25/2019 Perl & XML
4/208
32.973 018 681.3.0696
96 Perl & XML. / . , . . .: ,2003. 208 : .
ISBN 5 94723 482 3 XML
Perl. XML , XML , (DOM), , Perl . . , Perl.
32.973 018 681.3.06
, , , . , , , .
2002 O'Reilly & Associates, Inc.ISBN 0 596 00205 (.) , , 2003ISBN 5 94723 482 3 , ,
, 2003
-
7/25/2019 Perl & XML
5/208
10
1. Perl XML 14
2. 1_ 25
3. XML: 49
4. 85
5. SAX 97
. 127
7. (DOM) 140
8. : XPath, XSLT
154
9. RSS, SOAP XML 170
10. 186
205
-
7/25/2019 Perl & XML
6/208
10 11 11 12 12 13
1. Perl XML 14 Perl XML? 14XML , 15 XML 19 20 21
21 XML 21 XML 22
XML 22 23 23 23 23 23 24
2. L 25 XML: 26, 30
32 33 34 , Unicode 37 XML 38
-
7/25/2019 Perl & XML
7/208
7
38XML : 40
41 43
45 45
3. XML: 49 XML 50
( ): 53
XML::Parser 57: , 59 61
: 63 65 XML::LibXML 68 XML::XPath 70 72
DTD 72 74 XML::Writer 75
, 78 79
Unicode, Perl XML 79 Unicode 80 81 82
4. 85 85 86 88 88
XML::PYX 89 XML::Parser 91
5. SAX 97 SAX 98DTD 103 106 , XML 108
-
7/25/2019 Perl & XML
8/208
8
110
XML::Handler::YAWriter 112
XML::SAX XML::SAX::ParserFactory 114 SAX2 116 SAX2 121 122 125
6. 127 XML 127
XML::Simple 129
XML::Parser 131 XML::SimpleObject 133
XML::TreeBuilder 135
XML::Grove 137
7. (DOM) 140DOM Perl 140
DOM 141 Document 141 DocumentFragment 142 DocumentType 142 Node 143 NodeList 144 NamedNodeMap 144 CharacterData 145 Element 145 Attr 146 Text 147 CDATASection 147
Processinglnstruction 147 Comment 147 EntityReference 147 Entity 148 Notation 148
XML::DOM 148 XML: :LibXML 151
8. : XPath, XSLT 154
154
XPath 157
XSLT 164
167
-
7/25/2019 Perl & XML
9/208
9
9. RSS, SOAP XML 170 XML 170
XML::RSS 171 RSS 172 XML::RSS 172 176: 177 179
XML 179 XML::Generator::DBI 180 DBI SAX 181
SOAP::Lite 182
: 183 : ISBN 184
10. 186 Perl XML 186 189
XML::ComicsML 190 XSLT: XML HTML 194
: Apache::DocBook 196
202
205
-
7/25/2019 Perl & XML
10/208
, , Web . XML , . , . Perl XML ( , Perl web
). XML , HTML,
, SGML. i . , , , web
. , XML ori , .
, SOAP XML RPC. , Unicode. ASCII. , . , .
, Perl , , Perl XML . : ? .
-
7/25/2019 Perl & XML
11/208
11
,
Perl XML . , Perl. . Perl 1.
XML. , , HTML. . H T M L 2 .
, Perl (CPAN, Comprehensive Perl Archive Network),
CPAN. , Perl XML. , .
.
1 . .
2 , XML. , . XML, ( , ).
3 XML. , , , .
4 , XML.
5 Simple API, XML (SAX), .
6 , XML . .
7 (Document Object Model,D O M ) . , XML D O M .
8 , .
9 , Perl XML, .
1 . Perl. .: , 2001.2
. HTML. .: , 2001.
-
7/25/2019 Perl & XML
12/208
12
10 . , , , .
, Perl XML, , , . , .
perl xml Perl XML, , . , web http://aspn.activestate.com/ASPN/Mail/Browse/Threaded/perl xml.
web http://www.xmlperl.com, , Perl/XML.
CPAN , , Perl, CPAN. Perl, , , .
. , web http://www.span.org. (FAQ), . , Perl ( ,
Perl ).
(Paula Ferguson)
. (Andy Oram), (JonOrwant), (Michel Rodriguez), (SimonSt.Laurent), (Matt Sergeant), ( Sterin), (Mike Stok), (Nat Torkington), ,
(Linda Mui). ; (, , , ,
, , , , , , , , , , , , , , , , , , ); (Derrick Arnelle), (Stacy
Chandler), . . (J. D. Curran), Cape (Sarah Demb), (Ryan Frasier), (Chris Gernon), CJohn Grigsby), (Andy Grosser), (Lisa Musiker), (Benn Salter), (Caroline Senay), (Greg Travis), (Barbara
Young); : , , , .
-
7/25/2019 Perl & XML
13/208
13
, ; Looney Labs (Tittp://www.looneylabs.com), (Boston Warren), ; , ; Diesel Cafe ( ) 1369 Coffee House , ; , : The Cat; Apple Computer iBook Mac OS X, ; , (Larry Wall) , Perl.
, [email protected] ( , ).
! , , http://
www.piter.com/download. web http://www.piter.com
.
-
7/25/2019 Perl & XML
14/208
Perl XML
Perl . Perl, XML , . web , , web ,
, . , . , .
Perl XML? , Perl
. , , , . , , , Perl, , Perl
. A XML, , , Per l XML .
, 5.6, Perl , Unicode (, UTF 8), XML . 3.
, Perl(CPAN), Perl , . ; , Perl, . . , (parser), CPAN
-
7/25/2019 Perl & XML
15/208
XML , 15
, ?, , . CPAN : , . , PAN . XML,
. XML .
Perl. , . SAX, , .
, Perl, , XML. XML , . XML . , , , . , XML , , , . . , XML . , , , , . , .
, Perl Web. , Java JavaScript , , web , , Perl web . web , Perl, XML. , web Perl, XML.
, . Perl XML , . .
XML , XML
, , , . , , . , , DTD, .
: , XML ,
-
7/25/2019 Perl & XML
16/208
16 1 Perl XML
. , . API, . XML ,
, . XML , XML .
, XML : : Simple, (GrantMcLean). ,
XML . , XML , , . XML : : Simple . XML .
. , .
. , use
XML: : Simple :use XML::Simple;
XML : : Simple :
XML i n () XML , .
, . XMLou t () ,
, XML .
, . . .
, WarbleSoft SpamChucker, . /
XML , . , , . , XML .
XML . 1.1.
-
7/25/2019 Perl & XML
17/208
XML , 17
1.1. SpamChucker
JoeWrigley
17 Beable Ave . MeatballMI82649
HenriettaPussycat
R.F.D. 2FlangervilleNY83642
perldoc, XML : : Simple, , 1.2.
1.2. ,
# # XML ,
# WarbleSoft SpamChucker..# , # ,use strict;use warnings;
# XML::Simple.use XML::Simple;
# # "XMLin" XML::Simple.
# 'forcearray',
# # .my $cust_xml = XMLi('./customers.xml', forcearray=>l) ;
# customer,# # 'customer'.for my $customer (@{$cust_xml >{customer}}) {
-
7/25/2019 Perl & XML
18/208
18 1 Perl XML
# #'first name' 'surname' # Perl, uc().
foreach (qw(first name surname)) {$customer >{$_} >[0] = uc($customer >{$_} >[0]);
# XML ,# # ( ) .print XMLout($cus t_xml) ;print "\n" ;
(, , ), :
MI82649Meatball17 Beable Ave.
JOEi like [email protected]
< s u r n e IG l_ E Y / s r n e >42
NY83642FlangervilleR.F.O. 2
HENRIET Ai [email protected]
! , XML , , , . . , .
, . . ?
: . , , , . . , XML : : Simple,
-
7/25/2019 Perl & XML
19/208
XML 19
. , . , XML: : S i mpl e,
. , , . ,
SpamChncker, . . , , 1. ,
XML Perl! , .
, XML . , , , . , , XML Perl.
XML
, XML, . XML Perl.
XML ( , , ). . , . , .
, XML , , . , , ,
XML , . XML , , , , , .
Perl Perl : , , XML , use. ,
' , , , , .
, XML. , , , .
-
7/25/2019 Perl & XML
20/208
20 1 Peri XML
. XML Perl,
, XML: : Parser ( ). XML , . , , .
Perl , . Perl , , . CPAN. , Perl, , , CPAN Perl .
, , , XML, . XML CPAN
Perl , . , ,
. . ,
1998 , . . Perl/XML ( perl xml, tiveState). . , XML. , SAX D O M , XML , XPath. . ( XML: : SAX), DWIM Perl, 1.
, , , . XML: : Simple. ,
. XML .
1 DWIM Do What I Mean ( , ), , Perl.
-
7/25/2019 Perl & XML
21/208
21
, XML , CPAN, 90%. , 10% . , , , Perl XML ( , Perl ). , .
, XML , XML , , . , . XML , .
, . , , XML RPC, TCP, XML ! , , , XML , XML, .
XML , XML
: , , .. XML , XML . , . Perl, XML , . XML .
, XML, , . ,
, . , , ( , ), .
-
7/25/2019 Perl & XML
22/208
22 1 Perl XML
XML XML XML
. , XML , . : , . XML ,
. . DTD . ,
, . . , , , , ( ) . , , XML.
, XML , / . , :
. . Perl: ;
XML . , , Perl. XML : : Simple ., ;
, XML. , XML ( HTML ), . XML : XML .
, . L ripo .
XML , . , XML .
-
7/25/2019 Perl & XML
23/208
XML 23
XML .
, XML . , , , .
XXI , . , web
ASCII. Unicode, , . XML Unicode, , Perl Unicode, UTF 8. ,
, .
, . , , . . HTML ,
XSLT , . , .
, , . . ,
DTD, , . , .
: , , . ,
, ,
. , . ,
. , . .
-
7/25/2019 Perl & XML
24/208
24 1 Perl XML
, XML, , ,
. . , , . XML , , , . , .
Perl XML, , .
, , , Perl/XML .
-
7/25/2019 Perl & XML
25/208
XML
XML , ,
. ,
SGML, , . XML .
XML , . , . , (Document TypeDescription, DTD).
XML :, , , CDATA. , . , , xml: space
. , , .
. XML , , DTD. . , , . XSLT XML .
-
7/25/2019 Perl & XML
26/208
26 2 XML
, , .
"! XML,
, .
XML: , (). .
troff.
Unix. , .
, troff, . , . , , \ f I mini . . .
troff , . , ,
.vs 18 , 18 . , . ,
, . . , .
, troff , . !) , . , . , . , , , , .
, , . , , , . , . , .
-
7/25/2019 Perl & XML
27/208
XML: 27
, .
, troff,
. , . , . troff , , . Croft . , . , ,
( ).
troff . , . ? , , ? 60 . . . , .
(Graphic Comm unications Association, GC A). GenCode. , , . IBM (Generalized Markup Language, GML). (Charles Goldfarb), (Edward Mosher) (Raymond Lorie)1. IBM , , . , .
(American National Standards Institute, ANSI), GML. G M L n GenCode ANSI (Standard Generalized Markup Language, SGML). (U.S Department of
Defense) (Internal Revenue Service). . 1986 ISO .
1 , ( G M L )
-
7/25/2019 Perl & XML
28/208
28 2 XML
, .
, , . , , :
< f i r s t>Ri t a< / f i r s t>Book
1969423
. : , . (4/23/1969) (23/4/1969) . .
, . , SGML
, . , . , SGML. SGML , . , SGML .
: HTML? H e , HTML SGML?. HTML, , , SGML.
, SGML. SGML , , . SGML , ,
, IRS . , HTML
. , . troff, DocBook SGML . , HTML
-
7/25/2019 Perl & XML
29/208
XML: 29
, . HTML , SGML. HTML
, .
, SGML HTML. (Extensible Markup Language, XML). 'X' extensible (). HTML. , XML HTML .
, , . XML SGML.
, XML . : XMLRPC, XHTML, SVG DocBook XML. XML , XSL ( ), XSLT ( ), XPath ( ) XLink ( ). (World Wide Web Consortium, W3C). Microsoft, Sun, IBM, .
W3 C , . , , web http://www.w3.org/. W3 C , . , , 1.
, . , W3C, XML Schema. 3. ,
. , W3C .
, XML , web . XML.
1 , W3C, , ;
. , .
-
7/25/2019 Perl & XML
30/208
30 2 XML
, . . , troff, TeX HTML, , (,
). , , , , . , . , , , .
XML. ] . (, ). . 11 .
XML. XML . , , . , , , . XML . , ( , , ). ( , , ).
.)' ( ), . , , ( ). , , , . .
2.1 XML .
2.1 . XML < l i s t i d = " e r i k s t o d o 4 7 " >
Things to Do This Weekclean the aquariummow the lawnsave the whales
, , , , . , HTML , . , (""), .
-
7/25/2019 Perl & XML
31/208
, 31
. , . ( , priori ty = "important"). ,
. ,
. XML , XML XML, . , , . , , .
HTML
( SGML , XML)'. , , , , . , , , . , , , . , , , HTML . XML
. ,
, . XML , . . ( ) . , , . , , , . (), . ( ).
. 2.1.
XML. ( ) . , , . , < 1 i s t >,
. , , .
1 XML HTML XHTML. , HTML,
XML . XML (, ). web HTML . XML , DocHook MathML.
-
7/25/2019 Perl & XML
32/208
32 2 XML
[title] [item]*3e*Sfc>** ~~> S i '4* [ i t e m ]
"eriks todo 47" "Things to Do "clean the aquariunYhow the lawn"This Week"
\ " s a v e t h e w h a l e s "
" i m p o r t a n t "
. 2 .1 . ,
,
. , . , , . 2.2.
2.2. ,
Fish and Bicycles: A Connection?I have found a surprising relationshipof fish to bicycles, expressed by the equationf = kb+n. The graph below illustratesthe data curve of my experiment:
fish8099l
bicycleQ100050
fish=0.01*bicycle+81.4
. , . , graph:, chartml ( , ). graph: . .
-
7/25/2019 Perl & XML
33/208
33
, . , , . ,
, . ,
. xm\ns:nperjmKc=URL. ( gr aph:) . URL , URL . .
, . XML. , , , ,
, (, ). ( XSLT).
, . ,
, . DTD . , , ( , ). XML . , XML , DTD ( SGML). DTD.
, .
10 , .
, , , , .
, XML . . XML , , .
-
7/25/2019 Perl & XML
34/208
34 2 XML
, XML . , , 1. DTD ,
, . . , .
lie , XML , . xml: space . , 2. :
246 Marshmellow Ave.Slumbervilie, MA02149
, , . xml: space "defaul t" . XML , .
XML , . , , . XML 3, . XML . , , . .
(Document Type Declara
tion, DTD). . , . ( DTD, , , DTD .) , , . 2.3.
1 XML , . ,
XML . 3.2 xml .
XML .1 ,
. .
-
7/25/2019 Perl & XML
35/208
35
2.3. , < IDOCTYPE memo
SYSTEM "/xml dtds/memo.dtd"[
]>
All Oompa loompas
&companyname; has a new owner and CEO, Charlie Bucket. Sinceour name, &companyname;, has considerable brand recognition,the board has decided not to change i t . However, at Cha rl ie 'srequest, we will be changing our healthcare provider to themore comprehensive mpacare, which has better facilitiesfo r 'Loompas (text of the plan to follow). Thank you for workingat &companyname;
&heal thp lan ;
. DTD, , , .
( ), , , DOCTYPE.
, . . , SYSTEM "/xmldtds/memo.dtd", , ([ ]).
, . , . , . , , .
. : . companyname healthplan.
. , . , , . . , . , . U R L , web . XML
, .
, ;. (&) , ,
-
7/25/2019 Perl & XML
36/208
36 2 XML
. ,
( companyname).
( ), hea l thp lan ( , , , , , ). , , . , . , XML . XML , XSLT, ,
. . , , , . ' U ' : 0. , , , , . , ( XML ), . DC. GxOODC
220, U Unicode ( , XML, ).
, Uuml, , 00DC, XML DTD :
< ENTITY Uuml :>
XML , . 2.1.
, , XML.
2.1 XML
< >& &" "
'
XML : < & ;. . , ,
-
7/25/2019 Perl & XML
37/208
, Unicode 37
XML . , .
, Unicode , . , ( ), . US ASCII. 128 , ,
, , , , . 7 8 . . ISO Latinl, Unix . ISO Lalinl , , , , , , , .
, 8 .
, , Unicode. ,
. , Unicode 32 . . Unicode Consortium , , , .
, , ., , , , XML
. Unicode, , UTF 8. , , Unicode,
, ( , 255). , 1 , ISO Latin 1 ( , 0 255).
, , , , (, ). UTF 8. , Unicode,
-
7/25/2019 Perl & XML
38/208
38 2 XML. . 5.6, Perl UTF 8. 3.
XML , XML . ,
XML , , . , XML, , , DTD.
:
, ( version). , , , , UTT 8 ( ). , "yes" , , .
, XML . Ke(processi ng instructions, PI)
XML . , . , PI , . , :
The very long titlethat seemed to go on forever and ever PI file breaker,
chap04.xml. , ,
, . XML .
1. XML ,
-
7/25/2019 Perl & XML
39/208
39
. . 1 . ,
. , . PI . , . xm 2pdf, PI.
, 1 . .
, , , , ,
.
Perl (Plain Old Documentation, ) 1, PI POD . =f or =begi n /=end. POD , ( ).
XML . , , XML . ( , ).
. , . :
< ! >This is p er f e ct l y v i si b l e XML content .
XML HTML .
. . , , " " .
CDATA. , XML . , C D ATA . , , (, &), .
1 . Per]. .: , 2001.
-
7/25/2019 Perl & XML
40/208
40 2 XML
: 3 && @lines ) {
Sinput = ;}]]>, < ! [CDATA[ ] ] >,
, . CDATA , , XML . , , 1.
L : SGML DTD.
, , . , SGML , . HTML SGML,
. HTML HTML DTD. web .
XML , XML.
. , . , DTD , XML . .
XML , , . , HTML .
, , , , .
, , (. ).
CDATA XML DocBook, . , is XML , , < &.
-
7/25/2019 Perl & XML
41/208
41
? :
,
, , , . XML ;
;
, (, , , ), .
, ;
, . ;
; , (< >) (&),
, . CDATA;
, , . (/) (>) .
, W3C, web http://www.w3.org/XML.
XML , , ]' . , , . L , . , , . 3.
( , ), DTD . ,
-
7/25/2019 Perl & XML
42/208
42 2 XML
. DTD , , .
DTD , . , . , . :
< ELEMENT sandwich ((meat | cheese)+ | (peanut butter, j e l l y ) ) ,condiment+, pickle?)>< ELEMENT condiment (PCDATA | mustard | ketchup )*> .
( , , , EMPTY). . , , . , , DTD.
. . , . :
: ,
. . , i d, .
, . ( #REQUIRED). , price, CDATA. #IMPLIED. , t a s t e , "yummy", ( ). , name ( , ' BLT ' ).
. . , , , D ( !). , . , , ,, .
-
7/25/2019 Perl & XML
43/208
43
, XML. DTD .
, , . , DTD ] ] . , DTD , , .
, DTD , . W3C XML Schema. , , . , . , CPAN , .
DTD, XML Schema XML . , , , . ,
, . 2.4 , . .
2.4. XML
-
7/25/2019 Perl & XML
44/208
44 2 XML
, XML Schema. , , . .
, . ,
. "census", CensusType. , DTD, , , . name="CensusType". , , . date.
da te
, : . , , , . DTD.
. (), , , , ; ; , URL ; , , DTD; .
, (facet), . , , , , , 200,
max in e lusive. XML Schema ,, scale, encoding, pattern, enumerationnmax length.
-
7/25/2019 Perl & XML
45/208
45
Address : , . , po sta lcode : [A Z] d{3}. : , . , .
, XML, , ( ). , .
W3C, XML Schema , . , , RelaxNG ( web http://www.oasis open.org/committees/relax ng/) Schematron (http://www.ascc.net/xml/resource/schematron/schematron.html). . Perl, . 3.
. XML . W3C XML, ,
XML (XSLT, XML Stylesheet Language for
Transformations). . XML Schema, XSLT XML . , , . . ,
. : , XSLT .
2.5 ,
XML , DocBook, HTML . 2.5. , XSLT
-
7/25/2019 Perl & XML
46/208
4 6 2 XML
< ! BOOK >< x s l : t e m p l a t e match="book">
< h t m l >< t i t l e > < x s l : v a l u e o f s e l e c t = " t i t l e " / > < / t i t l e >
< h l > < x s l : v a l u e o f s e l e c t = " t i t l e " / > < / h l >Tab le of Contents< x s l : c a l l t e m p l a t e n a m e = " t o c " / >< x s l : a p p l y t e m p l a t e s se le ct = " chapte r " />
< / h t m l >
< / x s l : t e m p l a t e ?
< ! CHAPTER >< x s l : t e m p l a t e m a t c h = " c h a p t e r " >
< x s l : a p p l y t e m p l a t e s / >< / x s l : t e m p l a t e >
< ! CHAPTER TITLE >< x s l : t e m p l a t e m a t c h = " c h a p t e r / t i t l e " >
< x s l : t e x t> Ch a pt e r < / x s l : t e x t >
< x s l : a p p l y t e m p i a t e s / >
< / x s l : t e m p l a t e >
< ! PARA >< x s l : t e m p l a t e match="para">
< p > < x s l : a p p l y t e m p i a t e s / > < / p >< / x s l : t e m p l a t e >
< ! : >< x s l : t e m p l a t e name="toc">
< x s l : i f t e s t = " c o u n t ( c h a p t e r ) > 0 " >< x s l : f o r e a c h s e l e c t = " c h a p t e r " >
< x s l : t e x t >C h ap t e r < / x s l : t e x t >< x s l : v a l u e o f s e l e c t = " p o s i t i o n ( ) " />< x s l : t e x t > : < / x s l : t e x t >< i > < x s l : va lue o f se le c t = " t i t l e " / > < / i >
< / x s l : f o r e a c h >< / x s 1 : i f >
< / x s l : t e m p l a t e >
< / x s l : s t y l e s h e e t >
XSLT . XML ( ) . ,
, , . XSLT . , , , .
-
7/25/2019 Perl & XML
47/208
47
2.6 , .
2.6.
< t i t l e > T h e B l a t h e r i n g B r a i n s < / t i t l e >
< t i t l e > A t t h e B a z a a r < / t i t l e > What a fantastic day it was. The crates were stacked
high with imported goods: dates, bananas, dried meats,f ine si lks, and more things than I could imagine. As I wa lke d around, s avo rin g the f rag ranc es of cinnamon andcardamom, I almost didn't notice a small booth with al i t t l e man s e l l i n g b r a i n s . < / p a r a >
< p a r a > B r a i n s Yes, human b r a i n s , s t i l l q u i t e mois t and s qu i shy ,swimming in big glas s ja rs f u l l of some gre en ishf l u i d . < / p a r a >
"Would you like a brain, sir?" he asked. "Veryreasonable prices. Here is Enrico Fermi 's brain foronly two d racmas . Or, perhaps, you would preferOr the great emperor Akhnaten?"
I rec oi le d in horro r . . . < /pa ra>
.
1. . ,
book. . , , s i : namespace.
2. XSLT , ,
, . , . .
3. , < xs l : c a l l t e m p l a t e name="toc"/>. , , < x s l : t e m p l a t e name=" toc">. . .
4. . ,
, (). .
5. ,
-
7/25/2019 Perl & XML
48/208
48 2 XIML
. foreachQ Perl. < c h a p t e r> , . "Chapter I " , "Chapter 2" . .
6. " t o e " . . ,
, . se l e c t = " chap te r" , . , .
7. . , < t i t l e > .
XSLT , . , . , . , Perl. , XSLT Perl. , , XSLT Perl.
XML. XML Perl . XML, , , . , XML
-
7/25/2019 Perl & XML
49/208
XML:
, L : , . , XM.L , , . , Perl , . XML
, ( , 2), . , , , XML. . 1.
. ,
. , , . , . .
, . ? ? ? ? , .
, , XML . ,
.
-
7/25/2019 Perl & XML
50/208
50 3 XML:
XML / ,
: , . . , ,
( , ). , , . XML, , , . , , XML , . , 1.
, ? , 'L , XML
. . XML , , . , . (, ). (, ). , , .
.
XML (
XML ) , . Perl , , , , . XML , . , . XML , . : , , , , ,
1 (Douglas Adams Hitchhiker's Guidelo the Galaxy) , , .
-
7/25/2019 Perl & XML
51/208
XML 51
. . , XML :
; ;
; , ( )
; . XML ,
.
: , , . (, , ). , , L .
, . . , XML
. . , DTD . URL . , ,
, , .
. XML , . ( , , URL ), . , .
. , , , XML .
, . , .
. XML
-
7/25/2019 Perl & XML
52/208
52 3 XML:
. , , , . , XML . < decree > , . 'now" , . .
-
7/25/2019 Perl & XML
53/208
XML 53
. , . :
:
, , ;
, XML. , , , ;
. ,
, , .
( ):
XML , .
, !'] . , . , .
. , XML . , Perl ( ) , , Perl ,
. XML, .
, , , ! Perl XML, , . , ( ). , XML Perl .
, XML , . , , ..
-
7/25/2019 Perl & XML
54/208
-
7/25/2019 Perl & XML
55/208
XML 55
if( Stext =~ /\]\]>/ ) {$text = $';
} else {return;
# .} elsif( $text =~ m| A&\?$ident\s*[ \?]+\?> | ) {
Stext = $' ;
# # ( , ).} elsif( $text =~ m| A\s+| ) {
Stext = $' ;
# .} elsif( $text =~ /( [ &&>]+)/ ) {
my $data = $1;# ,return if( $data =~ /\S/ and not( elements )) ;Stext = $';
# .} elsif( Stext =~ / A&Sident;+/ ) {
Stext = $' ;
# .} else {
return;
return if( elements ); # return 1;
}
Perl ( ) 1. , . , push () POP () . , (last in, first out, L I F O ) . , , . ,
, . , , , . ,
( ) .
, XML , . , . , , ,
1 . Perl ( ).
-
7/25/2019 Perl & XML
56/208
56 3 XML:
... . XML .
, ., , . . , , ; , , ,
. , , :
( : "1 2f , " " " " . . " ) ;
, , : ;
; (/)
; (&)
(
-
7/25/2019 Perl & XML
57/208
XML::Parser 57
, DTD, , .
, , .
, . , . ? , , . , . , .
, ( , , ). , . , :
. , ;
DTD, ;
. .
, . , !
, XML , . , , , . ,
, , .
XML::Parser . , , .
, . , XML . , Perl XML,
-
7/25/2019 Perl & XML
58/208
58 3 XML:
, .
Perl
? : Perl (Comprehensive Perl Archive Network,CPAN). Perl , . CPAN, .
, Perl XML, .
CPAN , , XML . , , .
XML , RSS SOAP. . , , . XML , . .
XML ., ,
XML . , , . , ,
. , . , Perl
, SAX D O M . XML : .'Parser XML ,
Perl. ,
. , , , , . , L : : Parser API . , . ,
, . , , Perl XML, XML: : Parser.
2001 , . . XML: : Parser, .
-
7/25/2019 Perl & XML
59/208
XML::Parser 59
XML (JamesClark), , XML Expat 1. ,
, XML Perl. (Larry Wall) API
, XML : : Parser : : Expat. , XML : : Parser. .
(Clark Cooper) XML : : Parser XML .
XML : : Parser .
, Perl. XML , . , XML , Perl, ,
. , Expat. , Perl, ( ] Perl, XS 2, ), XML: : Parser Expat.
: XML: : Parser . , , XML: :Parser, 3.2.
3.2. , XML::Parser
use XML::Parser;my $xmlfile = shift (SARGV; # # ,my Sparser = XML::Parser >new( ErrorContext => 2 );eval { $parser >parsefi le( Sxmlfile ); };# , # , .if( $@ ) {
$@ =~ s/at \/.*?$//s; # ,print STDERR \nERR0R in "$file":\n$@\n;
1 XML. W3C, . web http://www.jclark.com/. XSLT XPath. ]) nai 'mi web
http://www.w3.org/.2 perlxs.
-
7/25/2019 Perl & XML
60/208
6 0 3 XML:
} else {print STDERR "$file" is well formed\n;
> .
XML: : Parser, . , , . . , Expat. .
eval p a r s e f i l e Q . , XML: : Parser
, . eval , . . $@. , , .
ErrorContex t => 2. XML: : Parser , . , Expat.
, , , . , , ,
, . ,
( xwf, , XML well formedness ( X M L ) ) :
$ xwf ch0l.xmlERROR in ' chOl .xml ' :not well formed (invalid token) at line 66, column 22, byte 2354:Lions , Tigers & Bears
, . . , .
. , . , s () . NoExpand => 1, . , , .
-
7/25/2019 Perl & XML
61/208
XML::Parser 61
, , XML . , . .
XML: : Parser , . XML . . , , . ,] , . , style. :
(debug) STDOUT, ( ). pa r se () ;
(tree)
, . , ;
(object) , , . , , , XML ;
(subs) . , .
, . pkg. , , foobyO . , _fooby(), . , , . ;
(stream) Subs,
, XML , , . , , . ( , ),
-
7/25/2019 Perl & XML
62/208
62 3 XML:
. Hand lers se tHand le r sO;
# (Custom) XML: :Parser . API , . , XML: : Parser: :PerlSAX SAX.
3.3 , XML:: Parser. Style Tree. , XML . "
. 3.3. XML
use XML::Parser ;# .Sparser = new XML::Parser( Style => "Tree" );my S t r e e = $ p a r s e r > p a r s e f i l e ( s h i f t @ARGV );
# ,use D a t a : : P u m p e r ;p r i n t Dumper( $ t r e e ) ;
lipii p a r s e f i l e O , , . Data: : Dumper , . 3.4 .
3.4. XML
Courier9
Times New Roman14
Helvetica16
( ):
Stree = ['preferences ' , [
{}, 0. '\n' ,'font' , [
{ 'role' => 'console' }, 0, '\',
-
7/25/2019 Perl & XML
63/208
: 63
'size' , [ {}, 6, '9' ], 0, '\' ,'fname', [ {>, 0, 'Courier' ], 0, '\n'
], 0, '\n' ,'font' , [
{ 'role' => 'default' }, 0, '\n','fname', [ {}, 0, 'Times New Roman' ], 0, '\n','size' , [ {} , 0, '14' ] , 0, '\n'
], 0, '\n' ,'font' , [
{ 'role' => 'titles' } , 0, '\n' ,'size' , [ {}, 0, '10' ] , 0, ' \n' ,'fname', [ {}, 0, 'Helvetica' ], 0, '\n',
] , 0, '\n' ,
, , . , , XML. 4 XML : : Parser, 6 .
:
, Perl: ? XML. , , . , . , . .
XML . : . , , XML . , XML . . . ,
, , , XML . , . , .
, .
-
7/25/2019 Perl & XML
64/208
64 3 XML:
, .
, , , . , , , . , XML . , Perl , , . , ,
. , .
, ?
. . . , , . , , . , , , , . .
, .
, . , . , , , . . , , .
, , , , .
. , , , . , , , . XML :: Parser .
XML XML . , . XML , .
, . . 4 , , . 6
-
7/25/2019 Perl & XML
65/208
65
. 8 , .
. . , ( 3.3). . XML ,
. , , , XML.
XML: : Parser ( Expat). Expat , . XML , , , , , , , . .
, . , . Handlers . 3.5. 3.5. XML ,
use XML::Parser;# .
my Sparser = XML::Parser >new( Handlers =>{
Start=>\&handle_start,End=>\&handle_end.
} ) ;$parser >parsefile( shift @ARGV );my @element_stack; # .
# : # .#sub nandle_start {
my( $expat, Selement, %attrs ) = @_;# expat ,my Sline = $expat >current_line;
-
7/25/2019 Perl & XML
66/208
66 3 XML:
print "I see an Selement element starting on line $line!\n";# # .push( @element_stack, { element=>$element, line=>$line }) :
if( %attrs ) {print "It has these attributes: \n" ;while( my( $key, lvalue ) = each( %attrs )) {
print "\t$key => $value\n";
# .#sub handle_end {
my( $expat, Selement ) = @_;
# ,# , # , # XML::Parser " " # , my $element_record = pop( @element_stack );print "I see that Selement element that started on line ",
$$element_record{ line }, " is closing now.\n";}
, , . , h a n d l e _ s t a r t ( )
handle_end() , new(). parse() . , , , . , ,
. ,
$expat. L : :Parse r : : Expat, Expat.
, (, ).
. ?
, 1.1:
I see a spam document element starting on line 1!It has these attributes:
version => 3.5timestamp => 2002 05 13 15:33:45
I see a customer element starting on line 3!I see a first name element starting on line 4!I see that the fi rst name element that started online4 is closing now.I see a surname element starting on line 5!
-
7/25/2019 Perl & XML
67/208
67
I see that the surname element t ha t s ta rt ed on l i n e 5 is c lo si ng now.1 see a address element starting on line 6!I see a street element starting on line 7!I see tha t the st re et element that s ta rte d on li ne 7 is c losi ng now.I see a city element starting on line 8!I see that tiie city element that started on line 8 is closing now.I see a state element starting on line 9!I see that the state element that started on line 9 is closing now.I see a zip element starting on line 10!I see that the zip element that started on line 10 is closing now.1 see that the address element that sta rted on l in e 6 is cl os in g now.I see a email element starting on line 12!I see that the email element that s ta rt ed on l ine 12 is c lo si ng now.I see a age element starting on line 13!I see that the age element that started on line 13 is closing now.I see that the customer element that sta rted on l in e 3 is cl os ing now.
[ . .. snipping other customers for bre vi ty 's sake . . . ]I see that the spam document element that st art ed on line 1 i s clo sing now. .
. , , XML : : Parser : : Expat. , , . . ,
. . ,
, . . .
, Simple API, XML (SAX).
, . , W3C. . . , . , ( , XML : : SAX , ). XML ( , ' ). SAX Perl,
!! . . SAX ( PcrlSAX) 5.
-
7/25/2019 Perl & XML
68/208
6 8 3 XML:
XML::LibXML XML: :LibXML, XML: : Parser, . GNOME 1 1 i bxml 2. XML: :Parser, , XML . (Document Object Model, DOM).
DOM XML . , SAX . , . DOM 7.
. , . , . . , , 3.5. .
3.(3.
, . DOM, . , , .
3.6. use XML::Li bXML ;use 10::Handle ;# ,my Sparser = new XML::LibXML;
# ,my $fh = new 10::Handle;if( $fh >fdopen( fileno( STDIN ), "r" )) {
my $doc = $parser >parse_fh( $fh );my d i s t;&proc_node( $doc >getDocumentElement. \%dist );foreach my $item ( sort keys %dist ) {
print "$item: ", $dist{ Sitem }, "\n";}$fh >close;
}# XML : .
' web http://www.libxml.org/.
-
7/25/2019 Perl & XML
69/208
XML:LibXML 69
# # .#sub proc_node {
my( $node, $dist ) = @_;return unless( $node >nodeType eq &XML_ELEMENT_NODE );$dist >{ $node >nodeName } ++;foreach my $child ( $node >getChi "Idnodes ) {
&proc_node( Schild, $dist );
, , 10: : Handle. , ,
Perl , : ,
, , , , . , , . XML , .
XML , .
. , . . , proc_node(). , , . , XML , , : , , .
, (, ).
, getChi Idnodes (), . , .
, . . ,
. . , . , , . , XML DocBook:
-
7/25/2019 Perl & XML
70/208
70 3 XML:
$ xfreq < ch03.xmlchapter: 1citetitle: 2fi rstterm : 16
footnote: 6foreignphrase: 2function: 10itemizedlist: 2list item: 21literal: 29note: 1orderedl1st: 1para: 77programlisting: 9replaceable: 1screen: 1section: 6sgmltag: 8simplesect: 1systemitem: 2term: 6title: 7vari ableli st: 1varli stentry : 6xref : 2
, . , .
XML::XPath , . . , . . , , .
XPath. XML 1.
, . . XPalh 8, , Unix. , !) .
XML : : XPath, (Mat t Sergeant) , , XML: :Parser.
XPath , ,
http://www.w3.org/TR/xpath/.
-
7/25/2019 Perl & XML
71/208
XML: :XPath 71
. ,
, ,
XML :
Bob Snob
123 Platypus LaneBurgopolis /city>FL12345
< >
, . 3.7 , XPath.
3.7. use XML::XPath;
my $*":le = 'customers.xml';my $xp = XML : : XPav.li >new(f lename= $f ile) ;
# XML::XPath ,# XML # Xpath : # , # , ,my $nodeset = $xp >find('//zip');
(Szipcodes; # if (my @nodelist = $nodeset >get jiodelist) {
# ! # XML::XPath::Mode::Element. # 'string value' , # .@zi pcodes = map($_ >stri ng__value , @nodelist);# zipcodes = sort(@zipcodes):
local $" = "\n";print "I found these zipcodes:\n@zipcodes\n" :
} else {print "The file $file didn't have any 'zip 1 elements in it!\n";
} , ,
, :I found these zipcodes:036421233382649
, . , .
-
7/25/2019 Perl & XML
72/208
72 3 XML:
XPath. , .
8. XML : : LibXML f i n d n o d e sQ , XML: :XPath. Element , . 10.
XML . XML
. , , XML , . , , , . .
. ,
. . , DTD, .
, XML . DTD , XML (XHTML web , M a t hML
). .
, Perl XML , XML , .
, , .
XML, . .
DTD (Document type description, DTD) , , XML
-
7/25/2019 Perl & XML
73/208
73
. , XML. , , . < ! : , ,
. 3.8 DTD.
3.8. DTD< ELEMENT memo (to, from, message)>< ENTITY % text only "(#PCDATA)*">< ELEMENT to %text only;>< ELEMENT from %text only;>< ELEMENT emphasis %text only;>< EN~ITY myname "Bartholomus Chiggin McNugget">
DTD , , , , , .
. :
< DOCTYPE memo SYSTEM "/dtdstuff/memo.dtd">
Sara Bellum&myname;Stop reading memos and get back to work
,
. . .
, DTD . , XML DTD. XML: : Li bXML . , , 3.9.
3.9. , use XML::Li bXML:use 10: .Handle;
# ,my Sparser = new XML::LibXML;
# ,my Sfh = new I0::Handle:if( $fh >fdopen( fUeno( STDIN ), "r " )) {
my $doc = $parser >parse_fh( $fh );if( $doc and $doc >is_valid ) {print "Yup, it's valid.\n";
> else {print "Yikes! Validity error.\n";
-
7/25/2019 Perl & XML
74/208
74 3 XML:
$fh >c lose ;} ,
. , , (, , ). 1. XML.: : Checker, . . (. J. Mather). , .
DTD , , , . , , :] ? XML Schema. XML Schema DTD , , .
2 , XML Schema
( , ) XML , VV3C. !! , XML Schema .
XML Schema RelaxNG, OASIS Opcn (http://www.oasis open.org/committees/relaxng/), Scheinatron, (Rick jclliffe) (http://www.ascc.net/xml/resource/schematron/schematron.html). L Schema,
XML , XML , , ,
XML . Schematron , Perl , ( XML : : Schematron, ' (Kip H a m p t o n ) ) .
Scheinatron , Perl XML. , XML , Perl . ,
' . , ,n.sgmls. / web http://www.jdark.com. \\ eb , http://www.stg.brown.edu/service/xmlnalid. , XML DOCTY'PE. 1/RL, , .
-
7/25/2019 Perl & XML
75/208
XML: :Writer 75
, XPath . , , ,
. , ( , XPath ). , Schematron, XSLT. XSLT. Perl XML : : Schematron
, , XSLT . XSLT .
Schematron , , , W3C. Perl , ,
( XML : :XPath). Perl , . , , , . Perl .
XML::Writer , , XML . , . , .
L . , , . , XML , , , . ? ? ? , , . .
XML : :Wri ter, (David Megginson), , XML . , XML . , , , XML . . 3.1.
-
7/25/2019 Perl & XML
76/208
76 3 XML:
3.1 XML:. Writer
end ()
xmlDec l ( [$endod ing , S s t a n d a l o n e ] )
d o c t y p e ( $ n a m e , [ $ p u b l i c l d ,S s y s t e m I D ] )
c o m m e n t ( $ t e x t )p i ( $ t a r g e t [ , $ d a t a ] )startTag($name [, $anamel =>S v a l u e l , . . . ] )
emptyTag($name [, Sanamel =>$v a lu e l , . . . ] )
endTag([$name])
d a t a E l e m e n t ( $ n a m e , $data [,$aname => $ v a l u e l , ...] ) $aname =>$va lue l , . . . ] )
c h a r a c t e r s ( S d a t a )
(.. , ). UNSAFE,
XML . "1.0"
XML . , . , startTagO . , , . ,
XML . , 3.10 HTML . 3.10. HTML
use 10;my $output = new 10::File(">output.xml");
use XML::Writer;my $writer = new XML::Writer( OUTPUT => $output );
$wri er >xmlDecl($wri ter >doctype($wri er >comment($wri ter >pi( 'foo'$wri ter >startTag($wri ter >startTag($writer >startTag( '$writer >startTag( '$writer >characters($wri ter >endTag( ) ;$writer >endTag( ) ;$writer >dataElement($wri ter >endTag( ) ;$wri ter >endTag( );$writer >end( );
UTF 8' );html' ) ;My happy little HTML page'
' bar ' ) ;'html ' ) ;'body ' ) ;'hi' ) ;'font', 'color' => 'green'
"" );
) ;
"Ni ce to see you." )
-
7/25/2019 Perl & XML
77/208
XML: :Writer 77
:
< DOCTYPE html>
Nice to see you .
. ,(, , &) . . , , , wi t h i n_e lenient ("f oo"). ,
"f " .
. XML . NEWLINES t rue, . DATA_MODE, . DATA_MODE DATA_INDENT . ,
.
. XML: : Write
. , ( 3.11).
3. 11 . use XML::Wri ter;my $wr = new XML::Writer( DATA_MODE => 'true 1, DATA_INDENT => 2 );&as_xml( shift @ARGV );$wr >end;
# XML .#sub as__xml {
my $path = shift;return unless( e $path );
# , # .if( d Spath ) {
$wr >startTag( 'directory', name => Spath );
# # .my contents = ( );opendi r( DIR, $path );while( $item = readdir( DIR )) {
next if( Sitem eq '.' or $item eq '..' );push( contents, $item );
}closedir( DIR );
-
7/25/2019 Perl & XML
78/208
78 3 XML:
# ,foreach my $item ( contents ) {
&as_xml( "$path/$item" );}
$wr >endTag( 'directory' );
# , # .} else {
$wr >emptyTag( 'file', name => $path );
]) ( DATA_M0DE DATA_INDENTc
):
$ ~/bin/dir /home/eray/xtools/XML DOM 1.25
< f i l e name="/home/eray/xtools/XML DOH l.25/t/astress,t" />< f i l e name="/home/eray/xtools/XML DOM 1.25/t/modify.t" !>
< f i l e name="/riome/eray/xtools/XML DOM 1.25/DOM.gif" />
< f i l e name="/home/eray/xtools/XML DOM 1.25/CheckAncestors.pm" />
, XML : : Wr i t e r , . , XML
. . , .
XML : :Wri ter .
, , , , XML .
, XML : : L i toS t r i ng () , . , , , API .
-
7/25/2019 Perl & XML
79/208
79
. , .
, , , print Perl.
>> , , XML : :Wri ter. XML,
, . , print . 10.
-
7/25/2019 Perl & XML
80/208
80 3 XML:
. , .
Unicode . , , , , . Unicode , .
Unicode , Unicode , .
Unicode , , , , Unicode. , 0x0041 ( ASCII ). , , , 0031 0263, . . , . .
Unicode. .
Unicode , UTF ( Unicode Transformation Format), , ( ) . : UTF 8, UTF 16 UTF 32. UTF 8 Perl .
UTF 8 UTF 8 Perl.
, . , UTF 8 XML : XML , UTF 8.
, UTF 8, , ( ). , 0x41, . 0263. . ,
, ,
-
7/25/2019 Perl & XML
81/208
81
.
UTF 16 U T F 1 6 . , (
UTF 8) . , , ( ). .
Unicode 2.0 , 16 , , Unicode, Unicode UTF 16. , Save
As..., Unicode UTF 8, Unicode 3.2.
UTF 32 UTF 32 UTF 16, , , UTF 32 . , . . Unicode. UTF 32 Unicode, XML . , .
XML 21 ,
( , UTF 8 UTF 16). 150 8859 1 (ASCII 128 , ) S h i f t_J IS ( Microsoft ). Unicode, Unicode ( , ).
XML Perl . . , XML::Parser . ,
Expat , Unicode. , , XML: : E n c o d i n g (Clark Cooper). , , XML: : P ar se r , (XML ), , , Unicode.
-
7/25/2019 Perl & XML
82/208
82 3 XML:
Unicode Perl XML, Perl Unicode , 1. , Unicode
Perl 5.6 . Perl, man perlunicode,
Unicode. Perl, . , , Perl XML Unicode, .
Perl, 5.6.1, Unicode. utf8 Perl UTF 8 . Perl UTF 8, ASCII . , , .
Perl 5.8 Unicode, UTF 8 . 5.8 Encode, Perl. Perl Unicode:
use Encode "from_to";from_to($data, "iso 8859 3". "utf 8"); # utf 8
, Perl 6 , Perl . , Unicode . , .
Perl 5.8 , .
, , .
iconv Text::Iconv , iconv , Windows Unix ( Mac OS X). , . Unix:
$ iconv f latinl t utfS my_file.txt > my_unicode_file.txt
iconv CPAN Perl Text: : Iconv, Perl API,
1 , , , , , Perl . .
-
7/25/2019 Perl & XML
83/208
83
. .
Unicode: rString , Unicode: :String. . API , API. ,
. , , ' , , . 3.12 .
3.12. Unicode::String
use Uni c o d e : : S t r i ng :my $s trin g= "This sentenceexists in ASCII and UTF 8, but not UTF 16. Darn!\n";my $u = U n i c o d e : : S t r i n g > n e w ( $ s t r i n g ) ;# $u , .# 16
# , Perl # . !$ .= "\n\nOh. hey, it's Unicode all of a sudden. Hooray!!\n"
# UTF 16
#( UC52)print $u >ucs2;
# . nt $u >utf 8 :
, . , utf7 UTF 8.
, XML : : Parser Unicode. , , Unicode, UTF 8. , . XML: : P a rs e r , .
XML , , ,
(byte order mark, BOM).
. , Unicode UTF 16 UTF 32, ( UTF 8 ). , ( , ), , .
-
7/25/2019 Perl & XML
84/208
84 3 XML:
Unicode , U+FEFF. Unicode, UTF 16 UTF 32 '. , , QxFE QxFF , , UTF 16. , , Unicode, U + FFFE. ( UTF 32 : 0x000x00 OxFEOxFFn QxFF 0xFE 0x00 0x00, .)
XML , , UTF 16 UTF 32, . , Unicode , . , , , . , , ,
. , :
open XML_FILE, Sfilename or die "Can't read $filename: $!";
my $bom; # # ,read XML_FILE, $bom, 2;
# # Perl, ord().my $ordl = ord(substr($bom,0,1));my $ord2 = ord(substr($bom,1,1));
if ($ordl == 0xFE && $ord2 == 0xFF) {# UTF 16 # !
# . . . ...
} elsif ($ordl == 0xFF && $ord2 == OxEF) {# 0, # UTF 16, .# , # .
} else {# .
}
, XML . ,
, XML .
' UTF 8 , . XML .
-
7/25/2019 Perl & XML
85/208
, XML, , , XML .
. , . Simple API for XML (SAX).
. ( , ). , ,
. , . , , , .
, .
:
; ;
( ). , XML .
, , . , , ,
-
7/25/2019 Perl & XML
86/208
86 4
. XML , .
XML , ; , XML. , , , , . , XML, . , , . XML , , ?
, , . XML , . , SGML , . . , ,
.
, ? ? , XML ( ). . , , . , , .
, ( ). XML .
, . , ,, . , . XML . . .
, . , , ,
-
7/25/2019 Perl & XML
87/208
87
, , ( 4.1).
4. 1. XML
peanut butter and jelly sandwich
Gloppy brand peanut butterbreadjel 1
5pread peanutbutter on one slice of bread.Spread jelly on the other slice of bread.Put bread slices together, with peanut butter and
jelly touching.
:
( , );
; ; peanut butter and jelly sandwich;
;
add picture of sandwich here;
;
; Gloppy; t rade;
brand peanut butter*;
.
... , .
, . , . , , . , .. i f, ,
. ( XML: : Parser). , . .
-
7/25/2019 Perl & XML
88/208
88 4
XML ,
, . . , , . , XML .
, L IIOTOK , . , . ,
. . , XML : , , . , .
, , . . XML . XML SAX. XML , Perl , . , . , SAX ,
. 5.
XML . :
, ., , . , ;
, ,
. , , , . ( ) ;
-
7/25/2019 Perl & XML
89/208
XML::PYX 89
. ,
: , ; , . , ;
XML . , DocBook XML HTML. .
XML , . , , . . : , , 10 , 12 ?. , . . , : , 6.
, XML , , XML .
XML::PYX Perl API . CPAN, , . , . XML, , Perl .
XML Perl . , , , . , .
. , . XML : : PYX
. , , , , .
-
7/25/2019 Perl & XML
90/208
90 4
, XML. , XML , .
PYX XML , , Perl. XM L , Unix (, avvk grep) . PYX ( Perl).
. 4.1 PYX .
4.1 PYX
( )
?
PYX , , . 4.1. . , Perl .
11 XML PYX , . XML :
< ! >< i t e m > t o o t h p a s t e < / i t e m >rocket e n g i n e < / i t e m >< i t e m o p t i o n a l = " y e s " > c a v i a r < / i t e m >
< / s h o p p i n g l i s t >
PYX :
(shoppingli st\n(i tern toothpaste
) i tern\n
(i emrocket engine
) i tem\n
(i temAoptional yes
cavi ar) i tem\n
) shoppi gli st
, PYX . , PYX ,
-
7/25/2019 Perl & XML
91/208
XML:rParser 91
. , , CDATA. , . PYX, , .
(Matt Sergeant) , XML : : PYX, XML, PYX . XML ,
.
4.2. PYX use XML::PYX:
# PYX .my Sparser = XML::PYX::Parser >new;my $pyx;if (defined ( $ARGV[0] )) {
$pyx = $parser >parsefUe( $ARGV[0] );}
# .foreach( spli t ( / /, $pyx )) {
print $' if( / A / ) ;}
, PYX , XML , SAX D O M .
, , , , . , .
XML::Parser , CPAN, XML: : Pa r ser (
3.). . XML : : Parser ,
XML . , , . . HTML .
4.3. < l i s t> , (, ).
4.3.
ThadeusWrigley716 5O5 9910
-
7/25/2019 Perl & XML
92/208
92 4
105 Marsupial CourtFairportNY14450
JillBaxter
818 S. Rengstorff Avenue94040MountainviewCA
217 302 5455
Riccardo
Preston707 Foobah DriveMudhutOR32777
lll 222 333
10 Jiminy l_aneScrapheepPA99001
Benn5alter611 328 7578
st>
. . , . , . (
, ). 4.4. , . . , , .
, XML. . . , .
-
7/25/2019 Perl & XML
93/208
XML::Parser 93
. : , , . , , , .
4.4. # # ,use XML::Parser;my Sparser = XML::Parser >new( Handlers => {
Init => \&handle_doc_start,Final => \&handle_doc_end,Start => \&handle_elem_start,End => \&handle_elem_end,
Char => \&handle_char_data,} ) :## .#my $record; # .my $context;# .my %records;# .
## .#
my $file = shift @ARGV;if( $file ) {$parser >parsef i le( $f i "I e );
} else {my $input = "";while( ) { Sinput .= $__; }$parser >parse( $input );
}exi ;
###### .###
## HTML .#sub handle_doc_start {
print "addresses\n";print "addresses\n";
}## .#sub handle_elem_start {
my( Sexpat, Sname, %atts ) = @_;$context = Sname;$record = {} if( Sname eq 'entry' );
}# .
-
7/25/2019 Perl & XML
94/208
94 4
#sub handle_char_data {
my( $expat. $text ) = @_;
# "" .$text =~ s/&/&/g;$text =~ s/{ Scontext } .= $text;}
## ,# ,sub handle_elem_end {
my( $expat, $name ) = @_;return unless( $name eq 'entry' );
my $fullname = $record >{'last'} . $record >{'first'};$records{ $fullname } = $record;
}
## , .#sub handle_doc_end {
print "\n":print "namephoneaddress\n";foreacn my $key ( sort( keys( %records ))) {
print "" . $records{ $key } >{ 'first' } . ' ';print $records{ $key } >{ 'last' } . "";print $records{ $key } >{ 'phone' } . "";print $records{ $key } >{ 'street' } . ' , ' ;print $records{ $key } >{ 'city' } . ' , ' ;print $records{ $key } >{ 'state' } . ' ';print $records{ $key } >{ 'zip' > . "\n" ;
}print "\n\n\n";
}
. , XML : : Parser,
expat. , (, ). . , , , , .
. ( ), , .
, . , .
handle_doc_ s t a r t .
-
7/25/2019 Perl & XML
95/208
XML::Parser 95
. HTML , HTML , . .
, handle_elem_start, , . expat, : $name, , %atts . ( , . , @atts.) , .
, Scontext. , , . %record, , .
handle_char_ data , , , . , $text. , $record >{ Scontext }. , .
XML: :Parser , , '. , , . , .
, handle_elem_end , . ( ). , . , . , . . ( ). , HTML .
handle_doc_end, , . , , , .
1 Perl. XML . L ( ) .
-
7/25/2019 Perl & XML
96/208
96 4
HTML , .
, , XML
. , . , , .
-
7/25/2019 Perl & XML
97/208
SAX
XML:: Parser XML , Perl XML. , . , . , . , . XML: : Parser, . , .
, , . , . , . , , , .
XML , SAX. SAX XML DEV. (DavidMegginson) 1 . SAX 1 ( SAX1), , . , , CDATA. , SAX2, , XML .
1 web , SAX, http://www.saxproject.org.
-
7/25/2019 Perl & XML
98/208
98 5 SAX
SAX . . XML Java, SAX . ,
( ). SAX Perl.
CPAN, , , . Perl , Java. .
Java , , a Perl . .
SAX Perl XML: : Parse : :PerlSAX (Ken McLeod). XML : : Parser, Expat, SAX .
SAX SAX , SAX . . 5.1 . SAX
, . , .
5.1 PerlSAX
s t a r t _ d o c u m e n tend_documents t a r t _ e "lenient
e n d _ e l e m e n t
c h a r a c t e r s
p r o c e s s i n g _i i n s t ruc t ioncomments t a r t _ c d a t a
end_cdatae n t i t y _ r e f e r e n c e
( ) ( ) ( ) ( ) Name,
At t r ibu te s , Name , Data ( ) Targe t , Data
Data CDATA ( )( )
CDATA ( ) Name, Value( , , )
-
7/25/2019 Perl & XML
99/208
SAX 99
:
s t a r t _ e l e m e n t () end_element (),
; cha rac t er s ()
, (, , );
char acters ()( , );
, CDATA . ,
, . CDATA characters (), ;
s t a r t _c da t a ( ) end_cdat a() . , ,
, c h a r a c t e r s ( ) ;
en t i ty_ref erence () , . enti ty_ref erence (), .
. , , . :
XiVlL ; ; , < l i t e r a l > ,
.
5.1. , , , , MyHandler. , , ,
.
5. 1. .#use XML: . Parser: :PerlSAX;
my Sparser = XML::Parser::PerlSAX >new( Handler => MyHandler >new( ) );i f ( my $f i l e = sh i f t (a>ARGV ) {$parser >parse ( Source => {Systemld => $f i "Le} );
} else {my $input = "";
-
7/25/2019 Perl & XML
100/208
100 5 SAX
while( ) { Sinput .= $_; }$parser>parse( Source => {String => Sinput} );
}exi ;
## .#my @element_stack; # .my $in_intset; # : ?###### .###package MyHandler;## .#sub new {
my Stype = shift;return bless {}, Stype;
}## :# .#sub start_element {
my( Sself, Sproperties ) = @_;# , %{Sproperties} # .# ,
# .output( "]>\n" ) if( $i n_i ntset );Sin_intset = 0;# , .push( @element_stack, $properties >{' Name'} );# , # .unless( stack_top( 'literal' ) and
stack_contains( 'programlisting' )) {output( "" ) ;
## : # .#sub end_element {
my( Sself, Sproperties ) = @_;outputC "" )
unless( stack_top( 'literal' ) andstack_contains( 'programlisting' )) ;pop( @element_stack );
-
7/25/2019 Perl & XML
101/208
SAX 101
# , # .#sub characters {
my( Sself, $properties ) =
@_;# , # , # ,my Sdata = Sproperties >{'Data'};$data = s/\&/\&/;$data =~ s//\>/:output( Sdata );
}## : # .#sub comment {
my( Sself, $properties ) = @_;output( "" . $properties >{'Data'} . "" );
}## PI: #sub processi ng_i instruction {
# !}## # ( ) .#sub entity_reference {
my( Sself, $properties ) = @_;output( "&" . $properties >{'Name'} . ":" );
}sub stack_top {
my $guess = shift;return $element_stack[ $#element_stack ] eq Sguess;
}sub stack_contains {
my $guess = shift;foreach( @element_stack ) {
return 1 if( $_ eq Sguess );}return 0;
}sub output {
my Sstring = s h i f t ;print Sstring;
} , ,
$sel f, . . , , : , .
-
7/25/2019 Perl & XML
102/208
1 0 2 5 SAX
, XML . , '.
, , . , . , ( ) , , .
. 5.2.
5.2. < DOCTYPE book
SYSTEM "/usr/local/prod/sgml/db.dtd"[
]>
GRXL in a Nutshell
What is GRXI_?
Yet another acronym. That was our attitude at first, but then we sawthe amazing uses of this new technology calledGRXL. Consider the following program:
AH aof {{{{{{ let x = 0 }}}}}}
print wowor not
What does it do? Who cares? It's just lovely to look at. In fact,I'd have to say, "&thingy;".
1
, di f f , . XML: : Semant icDi f f (Kip Hampton). ,
.
-
7/25/2019 Perl & XML
103/208
DTD 1 0 3
, , 5.3.
5.3.
GRXL in a NutshelK/1 iU e>
What is GRXL? need a better title
Yet another acronym. That was our attitude at first, but then we sawthe amazing uses of this new technology calledGRXL . Consider the following program:
AH aof
{{{{{{ let x = 0 }}}}}}print wowor not what font should we use?
What does it do? Who cares? It's just lovely to look at. In fact,I'd have to say, "&thingy;".
, .
XML . < l i t e r a l > , , , , . , . . . XML
, . thingy . , .
DTD XML : : Parser : : PerlSAX , DTD . , , XML
, . . (, ), . , .
. , , , . . 5.2.
-
7/25/2019 Perl & XML
104/208
104 5 SAX
5.2 PerlSAX DTD
e n t i t y _ d e c l ( , )
n o t a t i o n _ d e c l
un pars ed_ ,e n t i t y _ d e d
(, )element _de cl a t t l i s t _ d e c l
do ct ype_de cl
x m l _ d e c l XML
Name, Value , P u b l i c l d ,S y s t e m l d , N o t a t i o n
Name, Publicld,Systemld, BaseName, Publicld,Systemld, Base
Name, ModelElementName,A t t r i b u t e N a m e ,
Type, FixedName, Systemld,P u b l i c l d , I n t e r n a l V e rs io n , Encoding,Standalone
en t i ty_decl () , . , , en t i ty_decl (), unparsed_ent i ty_decl ( ) , .
ent i ty_dec I () . Value , . Publ i c i d System i d, XML , , , . Base , URL , Systemi d .
DTD, . ,
"date" , XML , . XML , .
Model el emen t_dec l () . , DTD .
, XML . Name
. Publ ic id
Systemid DTD. In t erna l , .
-
7/25/2019 Perl & XML
105/208
DTD 105
, , , . ,
5.4. 5.4.
# xml .#sub xml_decl {
my( $self, $properties ) = @_;output( "{'Version'} . "\"" );my Sencoding = $properties >{'Encoding'};output( " encoding=\"$encoding\"" ) if( Sencoding );my $standalone = $properties >{'Standalone'};output( " standalone=\"$standalone\"" )
i f( Sstandalone ) ;output( " ?>\n" );
}## :# .#sub doctype_decl {
my( $self, $properties ) = @_;output( "\n< DOCTYPE " . $properties >{'Name'} .
"\n" );my $pubid = $properties >{'Publicld'};
if( Spubid ) {outputC " PUBLIC \"$pubid\"\n" );output( " V" . $properties >
{'Systemld'} . "\"\n" ) ;} else {
outputC " SYSTEM V" . $properties >{'Systemld
1} . "\"\n" ) ;
}my $intset = $properties >{'Internal'};if( $intset ) {
$in_intset = 1;output ( " [\n" ) :
} else { output ( ">\n" ) ;}
}## # , # .#sub entity_decl {
my( $self, $properties ) = @_;my $name = $properties >{'Name'};
outputC "< ENTITY
$name " );my Spubid = $properties >{'Publicld'};my $sysid = $properties >{'Systemld'};if( $pubid ) {
output( "PUBLIC \"$pubid\" \"$sysid\"" );
-
7/25/2019 Perl & XML
106/208
106 5 SAX
} elsif( $sysid ) {output( "SYSTEM \"$sysid\"" );
} else {output( "\"" . $properties >{'Value'} . "\"" );
output( ">\n" ) ;
, (. 5.5).
5 5 < DOCTYPE book
SYSTEM "/usr/local/prod/sgml/db.dtd"
GRXL in a NutshelWti tle>
What is GRXL? need a better title
Yet another acronym. That was our attitude at first, but then we saw theamazing uses of this new technology calledGRXL. Consider the following program:
AH aof
{{{{{{ let x = 0 }}}}}}print wowor not what font should we use?
What does it do? Who cares? I t ' s jus t lovely to look at . In fact ,I'd have to say. "St th ingy;" .
. .
, DTD , .
. , , , , , . ent i ty_ref ere nce () .
, . . , , " ?
-
7/25/2019 Perl & XML
107/208
107
, . , , XML ,
. . :
]>
The Bonehead Cookbook&i tro chapter;&pasta chapter;&stirfry chapter;&soups chapter;
, . , . , , resolve_enti ty ().
: Name ; Systemic P u b l i c Id , , . Base, URL , . , . undef
, . , , . , parse (), Systemld URL, a S t r i ng . :
sub resolve_entity {my( $self, $props ) = @_;if( exists( $props >{ Systemld }) and
open( ENT, $props >{ Systemld })) {my $entval = '':whHe( ) { Sentval .= $_; }close ENT;$entval .= '' ;return { String => Sentval };
} else {
-
7/25/2019 Perl & XML
108/208
108 5 SAX
return undef;
. , , . , .
, XML , , , XML . SAX. , , XML , . SAX SAX , . , . SAX , . ,
, . , XML : : SAXDri ver: : Excel (Ilya Ster in) Microsoft Excel XML . ,
. Spreadsheet : :ParseExcel , XML: : SAXDri ver: :Excel SAX . XML .
Excel:
1 552 333 124 77
SAX , . , , . , 5.6.
5.6. Exceluse XML::SAXDriver::Excel;# .d i ( "Must sp ec i fy an inp u t f i l e " ) u n l e s s t @ARGV ) :m y $ f 1 l e = s h i f t @ARGV;p r i n t "P ars in g $ f i l e . . . \ n " ;
-
7/25/2019 Perl & XML
109/208
, XML 109
# .my Shandler = new Excel_SAX_Handler; %props = ( Source => { Systemld => $file },
Handler => Shandler );my Sdriver = XML::SAXDriver::Excel >new( %props );# .$driver >parse( %props );# XML # SAX .package Excel_SAX_Handler;# ,sub new {
my $type = shift;my $self = {@_};return bless( Sself, Stype );
}# ,sub start_document {
print "\n";}# ,sub end_document {
pri nt "\n";}# ,sub characters {
my( Sself, Sproperties ) = @_;my $data = Sproperties >{'Data'};
print $data if defined(Sdata);}# , ,sub start_element {
my( Sself, $properties ) = @_;my $name = Sproperties >{'Name'};print "";
}# ,sub end_element {
my( Sself, Sproperties ) = @_;my Sname = Sproperties >{'Name'};
pri nt "";} , ,
SAX. , . , , :
baseballs55
tennisbal\s33
-
7/25/2019 Perl & XML
110/208
110 5 SAX
pingpong balls12
footballs77
Use of uninitialized value in print at conv line 39.
Use of uninitialized value in print at conv line 39.
. , , , . , . ( . s t a r t _ d o c u m e n t () end_document () .) < r e c o r d > . , . .
, SAX . , . , . (, ) . , , .
SAX ,
. s t a r t_e I emen t () , , , . ? XML : : Handler : : Subs (Ken MacLeod).
, . < t i t l e > , .
s_, ( ).
, _. .
, . $ se l f > {Names} . i n_e l emen t ($name) , , $name.
-
7/25/2019 Perl & XML
111/208
1 1 1
, . , HTML , , , (, ). , 5.7, .
5.7. , use XML::Parser::PerlSAX;use XML::Handler::Subs## .#use XML::Parser::PerlSAX;my Sparser = XML::Parser::PerlSAX >new( Handler => Hl_grabber >new() );$parser >parse( Source => {Systemld => shift @ARGV} );## : Hl_grabber.## package Hl_grabber;use base( 'XML::Handler::Subs ' );sub new {
my $type = shift;my Sself = {@_};return bless( Sself, $type );
}## .#sub start_document {
SUPER::start_document( );print "Summary of file:\n";
}## : # .#sub s_hl {
print "[";}## : # .#sub e_hl {
print "]\n";}## .#
sub characters {my( $self, Sprops ) = @_;my $data = $props >{Data};print $data if( Sself >in_e\ement( hi ));
-
7/25/2019 Perl & XML
112/208
1 1 2 5 SAX
, :
The Life and Timesof Fooby
Fooby as a child
...
Fooby grows up
..,
Fooby is in big trouble!...
:Summary of f i le :[Fooby as a child][Fooby grows up][Fooby is in big trouble!] in_element() < >.
, L: : H andler: : Subs SAX.
XML::Handler::YAWriter XML: . 'Handler: :YAWri te r (MichaelK oehn) XML . , , , SAX.
Perl Ti : : *, , : ,
, SAX . , , , .
, , : ( ) XML , SAX . , XML , PerlSAX, . , , , XML , XML .
, $self >SUPER: : [_] .
-
7/25/2019 Perl & XML
113/208
XML: :SAX 113
, XML .
XML::SAX SAX . API, .
(Matt Sergeant), (Kip Hampton) (Robin Berjon) XML : : SAX . SAX 2, .
: API?. , SAX, . : Perl SAX. SAX Java, . , , , . Perl .
SAX , , . SAX 1, . SAX2. SAX2 , , . . ? ,, foo:bar? ?
perl xml , SAX, Perl ( , SAX2 API). , XML: : SAX , XML: : SAX: : ParserFactory. Factory , , . XML: : SAX: : ParserFactory , , , , . , .
XML: : SAX XML Perl. , .
. , .
-
7/25/2019 Perl & XML
114/208
114 5 SAX
, . , , . , . , , . , .
XML::SAX::ParserFactory , , XML: :5AX: :ParserFactory.
, DBI, .
, . , . , SAX , XML::SAX::MyHandler.
, , :
use XML::SAX::ParserFactory;use XML::SAX::MyHandter;my Shandler = new XML: :SAX: :MyHandler;
my Sparser = XML::SAX::ParserFactory>parser( Handler => Shandler );$parser >parse_uri( "foo.xml" );
, . ( , Requi redFeat ure s) . , . XML: : SAX SAX . , , , ParserFactory. , XML::SAX: :BobsParser ,
, $XML: :SAX: : ParserPackage :use XML::SAX::ParserFactory;use XML::SAX::MyHandler:my Shandler = new XML::SAX::MyHandler;$XML::SAX::ParserPackage = "XML::SAX::BobsParser( 1.24 )";my Sparser = XML::SAX::ParserFactory >parser( Handler => Shandler ); $XML: : SAX: ParserPackage
XML: :SAX: :BobsParser(1.24) . ParserFactory requi re( ) ,
, new(). , 1.24, . , .
, XML: : SAX, parsers ():
-
7/25/2019 Perl & XML
115/208
XML: :SAX 115
use XML::SAX;my parsers = @{XML::SAX >parsers( )};foreach my $p ( parsers ) {
print "\n", $p >{ Name }, "\n";foreach my $f ( sort keys %{$p >{ Features }} ) {
print "$f => ", $p >{ Features }>{ $f }, "\n";
, , . , XML:: 5 , :
XML::LibXML::SAX::Parserhttp://xml.org/sax/features/namespaces => 1XML::SAX::PurePerlhttp://xml.org/sax/features/naraespaces => 1
, 4eHHbixBXML: : SAX. XML:: Li bXML: :5AX: : Parse SAX API Hbxml2 6. Iibxml2 , , . , , ,
. , XML: :SAX: : PurePerl, , Perl. , , Perl. .
, , , . , , .
requ i re_f ea tu re ()
factory:my Sfactory = new XML::SAX::ParserFactory;$factory >require_feature( 'http://xml.org/sax/features/validation
1 );$factory >requi e_feature( 'http://xml.org/sax/features/namespaces' );my Sparser = $factory >parser( Handler => Shandler ; factory :my Sfactory = new XML::SAX::ParserFactory(
Required_features => {'http://xml.org/sax/features/validation' => 1'http://xml.