NAME Tags::Reader::Perl - Parse SGML/HTML/XML by each "tag". SYNOPSIS use Tags::Reader::Perl; my $obj = Tags::Reader::Perl->new; my @tokens = $obj->gettoken; $obj->set_file($file, $force); $obj->set_text($text, $force); METHODS "new()" Constructor. "gettoken()" Get parsed token. Returns structure defining parsed token in array context. See TOKEN STRUCTURE e.g. → ('', 'xml', 1, 1) Returns parsed token in scalar mode. e.g. → '' "set_file($file[, $force])" Set file for parsing. If $force present, reset file for parsing if exists previous text or file. "set_text($text[, $force])" Set text for parsing. if $force present, reset text for parsing if exists previous text or file. TOKEN STRUCTURE Structure contains 4 fields in array: - parsed data - tag type - number of line - number of column in line Tag types are: - '[\w:]+' - element name. - '/[\w:]+' - end of element name. - '!data' - data - '![cdata[' - cdata - '!--' - comment - '?\w+' - instruction - '![\w+' - conditional - '!attlist' - DTD attlist - '!element' - DTD element - '!entity' - DTD entity - '!notation' - DTD notation ERRORS new(): From Class::Utils::set_params(): Unknown parameter '%s'. set_text(): Bad tag. Bad text. Cannot set new data if exists data. set_file(): Bad tag. Bad file. Cannot set new data if exists data. Cannot open file '%s'. EXAMPLE1 # Pragmas. use strict; use warnings; # Modules. use Encode qw(decode_utf8 encode_utf8); use Tag::Reader::Perl; # Object. my $obj = Tag::Reader::Perl->new; # Example data. my $sgml = <<'END'; Nová <číslo>5 END # Set data to object. $obj->set_text(decode_utf8($sgml)); # Tokenize. while (my @tag = $obj->gettoken) { print "[\n"; print "\t[0]: '".encode_utf8($tag[0])."'\n"; print "\t[1]: ".encode_utf8($tag[1])."\n"; print "\t[2]: $tag[2]\n"; print "\t[3]: $tag[3]\n"; print "]\n"; } # Output: # [ # [0]: '' # [1]: dokument # [2]: 1 # [3]: 1 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 1 # [3]: 11 # ] # [ # [0]: '' # [1]: adresa # [2]: 2 # [3]: 3 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 2 # [3]: 21 # ] # [ # [0]: '' # [1]: město # [2]: 3 # [3]: 5 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 3 # [3]: 12 # ] # [ # [0]: '' # [1]: ulice # [2]: 4 # [3]: 5 # ] # [ # [0]: 'Nová' # [1]: !data # [2]: 4 # [3]: 12 # ] # [ # [0]: '' # [1]: /ulice # [2]: 4 # [3]: 16 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 4 # [3]: 24 # ] # [ # [0]: '<číslo>' # [1]: číslo # [2]: 5 # [3]: 5 # ] # [ # [0]: '5' # [1]: !data # [2]: 5 # [3]: 12 # ] # [ # [0]: '' # [1]: /číslo # [2]: 5 # [3]: 13 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 5 # [3]: 21 # ] # [ # [0]: '' # [1]: /adresa # [2]: 6 # [3]: 3 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 6 # [3]: 12 # ] # [ # [0]: '' # [1]: /dokument # [2]: 7 # [3]: 1 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 7 # [3]: 12 # ] DEPENDENCIES Class::Utils, Error::Pure, Readonly, SEE ALSO Tag::Reader Parse SGML/HTML/XML by each "tag". HTML::TagReader Perl extension module for reading html/sgml/xml files by tags. AUTHOR Michal Špaček LICENSE AND COPYRIGHT © Michal Špaček 2005-2016 BSD 2-Clause License VERSION 0.01