-- This is used to pick up the lazytree.consume operator.
-- require "lazytree"
local Public = {}
xmliter = Public
--[[
xpairs(tree)
xpairs_c(tree)
xnpairs(tree)
xnpairs_c(tree)
Iterate over an XML tree.
xpairs(tree) returns an iterator over tree that returns each index and
its child. Example:
parent = lazytree.parsestring("
acdefb
")
for i,x in xpairs(parent) do
if type(x) == "string" then
print("string:", x)
else
print("tag:", x.name)
end
end
prints:
string: a
tag: z
string: b
Note that it does not descend into child elements (as "cdef" was not
printed).
xnpairs(tree) ignores character data elements, and returns an index,
tree, and element name (which may be ignored):
for i,x in xnpairs(parent) do
print("tag:", x.name)
end
for i,x,name in xnpairs(parent) do
print("tag:", name)
end
either of which prints:
tag: z
Consuming iterators:
xpairs_c(tree) and xnpairs_c(tree) also iterate over the children of
tree, but they consume the children of tree as they process it. The
following two fragments have similar semantics:
for i,x in xpairs(parent) do
parent[i] = nil
do_something_with(x)
end
for i,x in xpairs_c(parent) do
do_something_with(x)
end
Using a consuming iterator means that you do not care about accessing
previously processed trees through parent. However, you can still
save them for later use:
for i,x,name in xnpairs(parent) do
if x.name == "xref" then
table.insert(references, x)
end
end
The primary reason to use consuming iterators is to reduce memory
usage. When using conventional XML trees, this may help a little if
you are building up another data structure while tearing down the XML
tree; parts of the tree you have already processed are eligible for
garbage collection, saving space for your new structure.
However, when using lazytree XML trees, memory usage can be vastly
smaller. Consider processing a large log file:
[....]
[....]
[...millions of elements later...]
[....]
With a conventional XML tree, processing this requires space linearly
proportional to the size of all the elements. With normal
iterators and a lazy tree, this requires space linearly proportional
to all previously processed elements (as future elements are
only read on demand.) With consuming iterators and a lazy tree,
processing only requires space proportional to the size of a single
element, as previously processed s have been forgotten.
A secondary benefit to consuming iterators is that they may reduce CPU
usage a small amount. The Lua 5.0 garbage collector does not have to
work as hard during collections when less live data is present. (???
reread the GC algorithm to make sure this is true, have timing numbers
though.)
What is really going on here is that iterators provide an event-based
interface to tables. Consuming iterators provide many of the same
benefits as pure event-based XML parsers, while allowing you to
fluidly switch back to a tree-based API when that makes sense.
Usage hints:
It is always safe to replace a consuming iterator with a non-consuming
iterator; the only consequence may be memory exhaustion when
processing huge documents.
It makes the most sense to use a consuming iterator only as the last
step in processing a tree. Because of how lazy XML trees work, it is
not an error to touch child nodes before calling a consuming iterator.
When recursively processing elements, you should only call a consuming
iterator if you know your caller no longer cares about its contents.
A rule of thumb is to only call a consuming iterator inside another
consuming iterator.
]]
local function getn(tree)
return tree.n or table.getn(tree.n)
end
Public.getn = getn
local
function xnext(lz, i)
i = i + 1
local elt = lz[i]
if not elt then return nil end
return i, elt
end
function xpairs(lz)
if type(lz) ~= "table" then
error("argument to xpairs must be a table")
end
return xnext, lz, 0
end
local
function xnext_c(lz, i)
i = i + 1
local elt = lz[i]
lz[i] = nil
if not elt then return nil end
return i, elt
end
function xpairs_c(lz)
if type(lz) ~= "table" then
error("argument to xpairs_c must be a table")
end
lz._consumed = true
return xnext_c, lz, 0
end
local
function xnnext(lz, i)
i = i + 1
local elt = lz[i]
while elt and type(elt) ~= "table" do
i = i + 1
elt = lz[i]
end
if not elt then return nil end
return i, elt, elt.name
end
function xnpairs(lz)
if type(lz) ~= "table" then
error("argument to xnpairs must be a table")
end
return xnnext, lz, 0
end
local
function xnnext_c(lz, i)
i = i + 1
local elt = lz[i]
while elt and type(elt) ~= "table" do
lz[i] = nil
i = i + 1
elt = lz[i]
end
if not elt then return nil end
lz[i] = nil
return i, elt, elt.name
end
function xnpairs_c(lz)
if type(lz) ~= "table" then
error("argument to xnpairs_c must be a table")
end
lz._consumed = true
return xnnext_c, lz, 0
end
local
function xattrnext(attr, k)
local nextk, nextv = next(attr, k)
if not nextk then return nil end
if type(nextk) ~= "string" then
return xattrnext(attr, nextk)
end
return nextk, nextv
end
function xattrpairs(lz)
if type(lz) ~= "table" then
error("argument to xattrpairs must be a table")
end
local attr = lz.attr or {}
return xattrnext, attr, nil
end
local
function switch_internal(lz, ftable, parent, iterator, opts, consume)
if ftable[0] then
local escape, val = ftable[0](lz, parent)
if escape then
return escape, val
end
end
for i, elt in iterator(lz) do
if type(elt) == "string" then
local strhandler = ftable[""]
if strhandler then
local escape, val = strhandler(elt, lz)
if escape then
return escape, val
end
elseif opts.no_chardata then
error("found unexpected character data in "..elt.name)
end
else
local f = ftable[elt.name] or ftable[true]
if f then
local escape, val
if type(f) == "table" then
escape, val = switch_internal(elt, f, lz, iterator, opts)
else
escape, val = f(elt, lz)
end
if escape then
return escape, val
end
elseif opts.no_tags then
local parentstr = ""
if parent then
parentstr = " in parent "..parent.name
end
error("unexpected element "..elt.name..parentstr)
else
if consume then consume(elt) end
end
end
end
if ftable[-1] then
return ftable[-1](lz, parent)
end
end
local emptyopts = {}
local
function switch_c(lz, ftable, opts)
local consume = (lazytree and lazytree.consume) or nil
opts = opts or emptyopts
local parent = opts.parent
return switch_internal(lz, ftable, parent, xpairs_c, opts, consume)
end
Public.switch_c = switch_c
local
function switch(lz, ftable, opts)
opts = opts or emptyopts
local parent = opts.parent
return switch_internal(lz, ftable, parent, xpairs, opts, nil)
end
Public.switch = switch
return Public