#!/usr/bin/perl # $Id: xgrep,v 1.4 2010/10/26 16:11:18 thk Exp $ # Copyright 2010 Timo Korvola use warnings; use strict; use Getopt::Std; use XML::LibXML; sub HELP_MESSAGE { die <<"EOF"; Usage: $0 [-hv] [-n prefix] xpath-expr [file.xml ...] Evaluate xpath-expr in the root node context of each XML document and print the result. If the result is a node set, each node is preceded by an XML comment that indicates the file name, line number and node type. Other types of results are preceded by the file name and result type. These annotations can be disabled with -h. Regardless of the -h option, a newline is output after each result. If the result is a node set, each node is followed by a newline. If -v is given, xpath-expr must evaluate to a node set. Each XML document is printed to standard output with the matching nodes removed. Note that because the document is marshalled to DOM and re-serialized, insignificant textual changes such as replacing "" with "" may also appear. Namespace prefixes defined in the document root element can be used in xpath-expr. In addition, the -n option binds prefix to the default namespace of the document root element. Unprefixed names in xpath-expr always refer to the null namespace, not to the default namespace. If there is no default namespace definition in the root element, the -n option has no effect. EOF } my %options; $Getopt::Std::STANDARD_HELP_VERSION = 1; HELP_MESSAGE unless getopts('hvn:', \%options) && @ARGV >= 1; HELP_MESSAGE if $options{h} && $options{v}; my $expr = shift; my $parser = XML::LibXML->new(line_numbers => 1); sub non_matching($$) { my ($doc, $result) = @_; my $rtype = ref $result; die "Invalid result type $rtype for -v\n" unless $rtype eq "XML::LibXML::NodeList"; $_->unbindNode for $result->get_nodelist; print $doc->serialize; } sub xpath($$) { my ($doc, $fname) = @_; my $xpath = XML::LibXML::XPathContext->new($doc->documentElement); if ($options{n}) { foreach my $ns ($doc->documentElement->getNamespaces) { if ($ns->name eq "xmlns") { $xpath->registerNs($options{n}, $ns->value); last; } } } my $result = $xpath->find($expr); if ($options{v}) { non_matching $doc, $result; } else { my $rtype = ref $result; if ($rtype eq "XML::LibXML::NodeList") { for ($result->get_nodelist) { my $type = ref; $type =~ s/XML::LibXML:://; print "\n" unless $options{h}; print $_->serialize, "\n"; } } else { $rtype =~ s/XML::LibXML:://; print "$fname ($rtype): " unless $options{h}; print "$result\n"; } } } if (@ARGV) { xpath $parser->parse_file($_), $_ for @ARGV; } else { xpath $parser->parse_fh(*STDIN{IO}), "(stdin)"; }