## package RefDB::SRU
## RefDB SRU support module. Use this to implement SRU services for RefDB
## databases

## markus@mhoenicka.de 2007-02-07

##   This program is free software; you can redistribute it and/or modify
##   it under the terms of the GNU General Public License as published by
##   the Free Software Foundation; either version 2 of the License, or
##   (at your option) any later version.
##   
##   This program is distributed in the hope that it will be useful,
##   but WITHOUT ANY WARRANTY; without even the implied warranty of
##   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##   GNU General Public License for more details.
   
##   You should have received a copy of the GNU General Public License
##   along with this program; if not, write to the Free Software
##   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

## Package main documentation

=head1 NAME

RefDB::SRU - Module for a SRU service for RefDB

=head1 SYNOPSIS

  use RefDB::SRU;

  my $sru_query = new RefDB::SRU(\%params);

  print $sru_query->result();



=head1 DESCRIPTION

Implements the backend code of a SRU service to query RefDB databases. Your own code must provide the networking interface which invokes the code in this module


=head1 FEEDBACK

Send bug reports, questions, and comments to the refdb-users mailing list at:

refdb-users@lists.sourceforge.net

For list information and archives, please visit:

http://lists.sourceforge.net/lists/listinfo/refdb-users


=head1 AUTHOR

Markus Hoenicka, markus@mhoenicka.de

=head1 SEE ALSO

This module is part of the RefDB package, a reference manager and bibliography tool for markup languages. Please visit http://refdb.sourceforge.net for further information.

=cut

######################################################################
######################################################################
## defines a class to deal with SRU diagnostics
package Diagnostics;

######################################################################
## new
######################################################################
=head2 new

Title   : new

Usage   : $diag = Diagnostics::new($number, $detail)

Function: creates a new Diagnostics object

Parameter: $number: the SRU diagnostic number 

Parameter: $detail: the SRU diagnostic detail
=cut

sub new() {
    my ($class, $error_number, $detail)  = @_;
    my $self = {};
    
    # store the error number and the detail
    $self->{number} = $error_number;
    $self->{detail} = $detail;

    # the message is not stored in any way, as it can be generated
    # from the above conveniently whenever needed

    bless $self, $class;
    return $self;
}

######################################################################
## diag_uri
######################################################################
=head2 diag_uri

Title   : diag_uri

Usage   : $uri = $diag->diag_uri()

Function: returns the URI associated with the diagnostic

=cut

sub diag_uri () {
    my $self = shift;

    return "info:srw/diagnostic/1/" . $self->{number};
}

######################################################################
## diag_detail
######################################################################
=head2 diag_detail

Title   : diag_detail

Usage   : $detail = $diag->diag_detail()

Function: returns the detail associated with the diagnostic

=cut

sub diag_detail () {
    my $self = shift;

    return $self->{detail};
}

######################################################################
## diag_message
######################################################################
=head2 diag_message

Title   : diag_message

Usage   : $message = $diag->diag_message()

Function: returns the message associated with the diagnostic

=cut

sub diag_message () {
    my $self = shift;

    if ($self->{number} == 1) {
	"system error related to '" . $self->{detail};
    }
    elsif ($self->{number} == 4) {
	return "operation '" . $self->{detail} . "' is not supported";
    }
    elsif ($self->{number} == 7) {
	return "mandatory parameter '" . $self->{detail} . "' was not supplied";
    }
    elsif ($self->{number} == 10) {
	"query syntax error";
    }
    elsif ($self->{number} == 19) {
	return "relation '" . $self->{detail} . "' is not supported";
    }
    elsif ($self->{number} == 20) {
	return "relation modifiers are not supported";
    }
    elsif ($self->{number} == 39) {
	return "proximity queries are not supported";
    }
    elsif ($self->{number} == 66) {
	return "schema '" . $self->{detail} . "' is not supported";
    }
    elsif ($self->{number} == 71) {
	return "record packing '" . $self->{detail} . "' is not supported";
    }
    elsif ($self->{number} == 72) {
	return "recordXPath retrieval not supported";
    }
}


######################################################################
######################################################################
## defines a class to walk the CQL parse tree
package MyVisitor;
use base qw( CQL::Visitor );

######################################################################
## visit: walks the CQL parse tree
######################################################################
=head2 visit

Title   : visit

Usage   : called by CQL parser at each node

Function: processes each node

=cut

# we walk the parse tree and translate each subexpression into
# an equivalent RefDB query string. Parentheses keep the logic intact
# the query string is assembled in the refdb_query member of the MyVisitor
# object
sub visit {
    my ($self,$node) = @_;
    if ( $node->isa( 'CQL::BooleanNode' ) ) { 
	$self->boolean($node);
    }
    elsif ( $node->isa( 'CQL::TermNode' ) ) {
        $self->term( $node );
    }
    elsif ( $node->isa( 'CQL::ProxNode' ) ) {
	    $self->{unsupported} = "t";
    }
    elsif ( $node->isa( 'CQL::PrefixNode' ) ) {
	    $self->{unsupported} = "t";
    }
}

######################################################################
## boolean: called whenever visit() hits a boolean node
######################################################################
=head2 boolean

Title   : boolean

Usage   : called by visit

Function: translates boolean notes into the RefDB syntax 

=cut

sub boolean {
    my ($self,$node) = @_;

    my $diag;

    $self->{refdb_query} .= "(";
    $self->visit( $node->left() );
    if ($node->op() eq "not") {
	$self->{refdb_query} .= " AND NOT ";
    }
    elsif ($node->op() eq "prox") {
	$diag = Diagnostics->new(39, "prox");
	push (@{$self->{diagnostics}}, $diag);
	# pretend we support proximity queries
	$self->{refdb_query} .= " AND ";
    }
    else {
	$self->{refdb_query} .= " " . uc($node->op()) . " ";
    }
    $self->visit( $node->right() );
    $self->{refdb_query} .= ")";
}

######################################################################
## term: called by visit whenever a terminal node is hit
######################################################################
=head2 term

Title   : term

Usage   : called by visit()

Function: translates qualifier and terms into RefDB syntax

=cut

# called whenever the parser hits a terminal node
sub term {
    my ($self,$node) = @_;
    
    # transcribe qualifiers to RefDB syntax (bib context set)
    $node->{qualifier} =~ s/bib.title/:TX:/;
    $node->{qualifier} =~ s/bib.seriesTitle/:T3:/;
    $node->{qualifier} =~ s/bib.titleAbbrev/:JA:/;
    $node->{qualifier} =~ s/bib.name/:AX:/;
    $node->{qualifier} =~ s/bib.namePersonal/:AX:/;
    $node->{qualifier} =~ s/bib.nameCorporate/:AX:/;
    $node->{qualifier} =~ s/bib.subject/:KW:/;
    $node->{qualifier} =~ s/bib.dateIssued/:PY:/;
    $node->{qualifier} =~ s/bib.volume/:VL:/;
    $node->{qualifier} =~ s/bib.issue/:IS:/;
    $node->{qualifier} =~ s/bib.startPage/:SP:/;
    $node->{qualifier} =~ s/bib.endPage/:EP:/;

    # transcribe qualifiers to RefDB syntax (dc context set)
    $node->{qualifier} =~ s/dc.identifier/:CK:/;
    $node->{qualifier} =~ s/dc.title/:TX:/;
    $node->{qualifier} =~ s/dc.subject/:KW:/;
    $node->{qualifier} =~ s/dc.coverage/:KW:/;
    $node->{qualifier} =~ s/dc.creator/:AX:/;
    $node->{qualifier} =~ s/dc.publisher/:PB:/;
    $node->{qualifier} =~ s/dc.contributor/:AX:/;
    $node->{qualifier} =~ s/dc.date/:PY:/;

    # transcribe qualifiers to RefDB syntax (no context set)
    $node->{qualifier} =~ s/title/:TX:/;
    $node->{qualifier} =~ s/seriesTitle/:T3:/;
    $node->{qualifier} =~ s/titleAbbrev/:JA:/;
    $node->{qualifier} =~ s/name/:AX:/;
    $node->{qualifier} =~ s/namePersonal/:AX:/;
    $node->{qualifier} =~ s/nameCorporate/:AX:/;
    $node->{qualifier} =~ s/dateIssued/:PY:/;
    $node->{qualifier} =~ s/volume/:VL:/;
    $node->{qualifier} =~ s/issue/:IS:/;
    $node->{qualifier} =~ s/startPage/:SP:/;
    $node->{qualifier} =~ s/endPage/:EP:/;
    $node->{qualifier} =~ s/identifier/:CK:/;
    $node->{qualifier} =~ s/subject/:KW:/;
    $node->{qualifier} =~ s/coverage/:KW:/;
    $node->{qualifier} =~ s/creator/:AX:/;
    $node->{qualifier} =~ s/publisher/:PB:/;
    $node->{qualifier} =~ s/contributor/:AX:/;
    $node->{qualifier} =~ s/date/:PY:/;

    my $qualifier = $self->_maybeQuote($node->getQualifier());
    my $term = $self->_maybeQuote( $node->getTerm() );
    $term = $self->_translate_regex($term);
    my $relation = $node->getRelation();
    my $base = $relation->getBase();
    
    if ($base eq "=") {
	$base = "~";
    }

    if ($relation->getModifiers() > 0) {
	# modifiers are currently unsupported
	$diag = Diagnostics->new(20, "modifiers");
	push (@{$self->{diagnostics}}, $diag);
    }

    # expand "all" to an ANDed list, and "any" to an ORed list
    if ( $qualifier
	 and $qualifier !~ /srw\.serverChoice/i
	 and $qualifier !~ /srw\.anywhere/i) { 
	if (uc($base) eq "ALL" || uc($base) eq "ANY") {
	    # todo: make insensitive to whitespace
	    if ($term =~ /^\".*\"$/) {
		$term =~ s/^\"(.*)\"$/$1/;
	    }

	    my @terms = split / /, $term;
		
	    if (uc($base) eq "ALL") {
		$self->{refdb_query} .= "($qualifier~"  . join(" AND $qualifier~", @terms) . ")";
	    }
	    else {
		$self->{refdb_query} .= "($qualifier~"  . join(" OR $qualifier~", @terms) . ")";
	    }
	}
	elsif (uc($base) eq "EXACT") {
	    $self->{refdb_query} .= "$qualifier=$term";
	}
	elsif (uc($base) eq "WITHIN") {
	    # todo: move stripping quote into sub
	    if ($term =~ /^\".*\"$/) {
		$term =~ s/^\"(.*)\"$/$1/;
	    }
	    my @terms = split / /, $term;
	    my $lower = $terms[0];
	    my $upper = $terms[1];
	    $self->{refdb_query} .= "$qualifier>=$lower AND $qualifier<=$upper";
	}
	elsif (uc($base) eq "ENCLOSES") {
	    $diag = Diagnostics->new(20, "encloses");
	    push (@{$self->{diagnostics}}, $diag);
	}
	else {
	    $self->{refdb_query} .= $qualifier .  $base . $term;
	}
    } 
    else {
	# if no or "anywhere" qualifier is given, we query titles, authors,
	# and keywords
	$self->{refdb_query} .= ":TX:~$term OR :AX:~$term OR :KW:~$term" ;
    }
}

######################################################################
## _translate_regex: translates a regular expression from CQL to db
######################################################################
=head2 _translate_regex

Title   : _translage_regex

Usage   : _translate_regex($string)

Function: translates regular expressions from the CQL syntax to the database engine syntax

=cut

sub _translate_regex {
    my ($self, $str) = @_;

    return if ! defined $str;

    if ($self->{'db_engine'} eq "mysql"
	|| $self->{'db_engine'} eq "pgsql") {
	# use Unix regexp

	# replace '*' with '.*'
	$str =~ s/([^\\]*)\*/$1.*/g;

	# replace '?' with '.'
	$str =~ s/([^\\]*)\?/$1./g;
	
	# replace a trailing '^' anchor with '$'
	$str =~ s/(\b.*\b)\^/$1\$/g;
    }
    else {
	# use SQL regexp

	# replace '*' with '%'
	$str =~ s/([^\\]*)\*/$1%/g;

	# replace '?' with '_'
	$str =~ s/([^\\]*)\?/$1_/g;

	# anchors are not supported
	$str =~ s/\^//g;
    }

    return $str;
}

######################################################################
## maybeQuote: quotes strings for inclusion in a CQL query
######################################################################
=head2 _maybeQuote

Title   : _maybeQuote

Usage   : _maybeQuote($string)

Function: quotes particular characters

=cut

sub _maybeQuote {
    my ($self, $str) = @_;

    return if ! defined $str;
    if ( $str =~ m|[" \t=<>/()]| ) { 
        $str =~ s/"/\\"/g;
        $str = qq("$str");
    }
    return $str;
}

#"

######################################################################
######################################################################
## defines the main class

package RefDB::SRU;

## need this for logging
use RefDB::Log;

## the refdb client module
use RefDB::Client;

## the parser for the query string
use CQL::Parser;

## used to output all XML
use XML::Writer;

use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);

$VERSION = "0.7";

=head1 package functions

=head2 new

Title   : new

Usage   : $my_query->new(\%params);

Function: creates and initializes a new query object

Arguments: reference to a hash containing the query parameter-value pairs

Returns : new object

=cut

sub new($) {
    my ($class, $paramref)  = @_;
    my $self = {};
    
    # store the reference to the parameter-value hash
    $self->{params} = $paramref;

    # this string will receive the refdb data
    $self->{refdb_result} = undef;

    # this string will receive the response
    $self->{response} = undef;

    # this array will hold non-surrogate diagnostic objects
    @{$self->{diagnostics}} = ();

    # this is the counter for retrieved references. Initialize
    # to zero instead of leaving it undefined
    $self->{num_references} = 0;

    # same with total number of references
    $self->{total_num_references} = 0;

    # namespace declarations. We'll put them here instead of scattering
    # them in all functions that use namespaces
    my $srw = "http://www.loc.gov/zing/srw/";
    my $diag = "http://www.loc.gov/zing/srw/diagnostic/";
    my $xcql = "http://www.loc.gov/zing/cql/xcql/";
    my $zeerex = "http://explain.z3950.org/dtd/2.1/";
    my $risx = "http://refdb.sourceforge.net/ns/risx/";
    my $mods = "http://www.loc.gov/mods/v3";

    # namespace hashes; both prefix_map and rpm (reverse_prefix_map)
    # are hash references. The latter merely adds the convenience of
    # being able to address each URL with the shorter prefix.
    $self->{prefix_map} = { $srw => 'srw',
			    $diag => 'diag',
			    $xcql => 'xcql',
			    $zeerex => 'zeerex',
			    $risx => 'risx',
			    $mods => 'mods'};

    my %reverse_prefix_map = reverse %{$self->{prefix_map}};
    $self->{rpm} = \%reverse_prefix_map;

    # record schemas
    my $risxdtd = "http://refdb.sourceforge.net/dtd/risx/risx.dtd";
    my $modsschema = "info:srw/schema/1/mods-v3.0";

    # schema hash (somewhat lame with two entries. more to come?)
    $self->{schema_map} = { $risxdtd => 'risx',
			    $modsschema => 'mods'};

    my %reverse_schema_map = reverse %{$self->{schema_map}};
    $self->{rsm} = \%reverse_schema_map;

    # logging options
    $self->{params}->{'logfile'} = (defined($self->{params}->{'logfile'})) ? $self->{params}->{'logfile'} : "/var/log/refdbsru.log";
    $self->{params}->{'loglevel'} = (defined($self->{params}->{'loglevel'})) ? $self->{params}->{'loglevel'} : 6;
    $self->{params}->{'logdest'} = (defined($self->{params}->{'logdest'})) ? $self->{params}->{'logdest'} : 2; ## 0 = stderr, 1 = syslog, 2 = file

    # post-process the logging parameters
    $self->{params}->{'logdest'} = RefDB::Log::num_logdest($self->{params}->{'logdest'});
    $self->{params}->{'loglevel'} = RefDB::Log::num_loglevel($self->{params}->{'loglevel'});

    # set up logging
    $self->{params}->{'log'} = RefDB::Log::->new($self->{params}->{'logdest'}, $self->{params}->{'loglevel'}, $self->{params}->{'logfile'}, "refdbsru");

    bless $self, $class;
    return $self;
}

######################################################################
## print_vars: displays the parameter/value pairs for debugging
######################################################################
=head2 print_vars

Title   : print_vars

Usage   : $my_query->print_vars;

Function: displays the parameter/value pairs for debugging purposes

=cut

sub print_vars() {
    my $self = shift;

    while ((my $key, my $value) = each %{$self->{params}}) {
	print $key . ": " . $value . "\n";
    }
}

######################################################################
## run: analyzes the parameter values and runs an appropriate operation
######################################################################
=head2 run

Title   : run

Usage   : $my_query->run;

Function: analyzes the parameter values and runs an appropriate operation

=cut

sub run() {
    my $self = shift;

    # see which operation we are supposed to perform
#    $self->print_vars();
    # an explain operation is requested if there is either no
    # parameter-value pair, no operation parameter, or if the
    # explain operation is explicitly requested

    if (keys(%{$self->{params}}) == 0
	|| !defined($self->{params}->{'operation'})
	|| $self->{params}->{'operation'} eq "explain") {
	$self->_check_params("explain");
	$self->_explain();
    }
    elsif ($self->{params}->{'operation'} eq "searchRetrieve"
	   || $self->{params}->{'operation'} eq "analyzeQuery") {
	# check for missing mandatory parameters
	$self->_check_params("searchRetrieve");
	$self->_searchRetrieve($self->{params}->{'operation'});
    }
    elsif ($self->{params}->{'operation'} eq "scan") {
	$self->_check_params("scan");
	$self->_scan();
    }
    else {
	# the SRU/SRW docs are silent about what should happen if an
	# invalid operation is requested. There is a diagnostic code for
	# this case, but I don't know what the XML output is supposed to
	# look like. Therefore, we just treat this like an explain request.
	$self->_explain();
    }

}

######################################################################
## response: accessor for the query response string
######################################################################
=head2 response

Title   : response

Usage   : $query->response()

Function: accessor for the query response string

=cut

sub response() {
    my $self = shift;

    $self->{response};
}

######################################################################
## _check_params: check the SRU parameters
######################################################################
#=head2 _check_params

#Title   : _check_params

#Usage   : $self->_check_params($operation)

#Function: checks the parameters of the SRU request

#Parameter: $operation either "searchRetrieve" or "scan"

#=cut

sub _check_params() {
    my ($self, $operation) = @_;
    my $diag;

    # mandatory parameters for all operations
    if (!defined($self->{params}->{'version'})
	&& $operation ne "explain") {
	$diag = Diagnostics->new(7, "version");
	push (@{$self->{diagnostics}}, $diag);
    }

    # not mandatory, but if given, only three operations are allowed
    # (no operation parameter counts as 'explain')
    if (defined($self->{params}->{'operation'})
	&& $self->{params}->{'operation'} ne "searchRetrieve"
	&& $self->{params}->{'operation'} ne "scan"
	&& $self->{params}->{'operation'} ne "explain") {
	$diag = Diagnostics->new(4, $operation);
	push (@{$self->{diagnostics}}, $diag);
    }

    # if there is no db_engine parameter, query the refdbd server and
    # see what it should be
    if (!defined($self->{params}->{'db_engine'})
	&& defined($self->{params}->{'operation'})
	&& ($self->{params}->{'operation'} eq "searchRetrieve"
	    || $self->{params}->{'operation'} eq "scan")) {
	my $client = new RefDB::Client;

	$client->set_conninfo($self->{params}->{'server_ip'}, $self->{params}->{'port'}, $self->{params}->{'username'}, $self->{params}->{'password'}, $self->{params}->{'database'}, $self->{params}->{'pdfroot'}, $self->{params}->{'css_url'}, $self->{params}->{'timeout'});

	# save error status and message in case we need it for diagnostics
	my $summary = $client->refdb_whichdb();
	my $result = $client->get_data();

	$result =~ s/.*Database server: ([^\n]+)\n.*/$1/s;
	$self->{params}->{'db_engine'} = $result;
    }

    # if no stylesheet is specified in the query, use our default, if any
    if (!defined($self->{params}->{'stylesheet'})) {
	if (defined($self->{params}->{'xsl_url'})
	    && length($self->{params}->{'xsl_url'}) > 0) {
	    $self->{params}->{'stylesheet'} = $self->{params}->{'xsl_url'};
	}
    }
 
    if ($operation eq "searchRetrieve") {
	# mandatory parameters
	if (!defined($self->{params}->{'query'})) {
	    $diag = Diagnostics->new(7, "query");
	    push (@{$self->{diagnostics}}, $diag);
	}

	# check values of optional parameters
	if (defined($self->{params}->{'recordPacking'})
	    && $self->{params}->{'recordPacking'} ne "xml") {
	    $diag = Diagnostics->new(71, $self->{params}->{'recordPacking'});
	    push (@{$self->{diagnostics}}, $diag);
	}
	else {
	    $self->{params}->{'recordPacking'} = "xml";
	}

	if (defined($self->{params}->{'recordSchema'})) {
	    if (!defined(${$self->{schema_map}}{$self->{params}->{'recordSchema'}})
		&& !defined(${$self->{rsm}}{$self->{params}->{'recordSchema'}})) {
		$diag = Diagnostics->new(66, $self->{params}->{'recordSchema'});
		push (@{$self->{diagnostics}}, $diag);
	    }
	}
	else {
	    # use MODS as default schema
	    $self->{params}->{'recordSchema'} = "mods";
	}

	if (defined($self->{params}->{'recordXPath'})) {
	    $diag = Diagnostics->new(72, "XPath");
	    push (@{$self->{diagnostics}}, $diag);
	}
    }
    elsif ($operation eq "scan") {
	# mandatory parameters
	if (!defined($self->{params}->{'scanClause'})) {
	    $diag = Diagnostics->new(7, "scanClause");
	    push (@{$self->{diagnostics}}, $diag);
	}
    }

    # return the number of diagnostics. Should be zero if all is fine
    return scalar @{$self->{diagnostics}};
}


######################################################################
## _explain: performs the explain operation
######################################################################
#=head2 _explain

#Title   : _explain

#Usage   : $self->explain;

#Function: performs the explain operation

#=cut

sub _explain() {
    my $self = shift;

    $self->{params}->{'log'}->log_print("debug", "explain");
    if (@{$self->{diagnostics}} == 0) {
	$self->_run_refdb_info();
    }
}

######################################################################
## _searchRetrieve: performs the searchRetrieve operation
######################################################################
#=head2 _searchRetrieve

#Title   : _searchRetrieve

#Usage   : $self->searchRetrieve();

#Argument: $operation: either "searchRetrieve" or "analyzeQuery"

#Function: performs the searchRetrieve operation

#=cut

sub _searchRetrieve() {
    my ($self, $operation) = @_;
   
    my $refdb_query;

    if (@{$self->{diagnostics}} == 0) {
	$self->{params}->{'log'}->log_print("debug", "searchRetrieve");

	my $parser = CQL::Parser->new();
	# uncomment to receive debug output on stderr
        #    $CQL::DEBUG = 1;

	# keep a copy of the original query
	my $cql = $self->{params}->{'query'};
	$self->{params}->{'log'}->log_print("debug", $cql);

	my $root = $parser->parse($self->{params}->{'query'});
	my $visitor = new MyVisitor;

	# pass the database engine to the visitor object to adapt the
	# regular expressions appropriately
	$visitor->{'db_engine'} = $self->{params}->{'db_engine'};

	# parse and translate the query
	$visitor->visit($root);

	# transfer visitor diagnostics to query diagnostics
	if (defined(@{$visitor->{diagnostics}})) {
	    push (@{$self->{diagnostics}}, @{$visitor->{diagnostics}});
	}

	$refdb_query = $visitor->{'refdb_query'};
    }

#    if ($operation eq "searchRetrieve") {
	$self->_run_refdb_query($refdb_query);
#    }
#    else {
	# todo: send back an XML-wrapped input and output query string
#	$self->_wrap_query_strings($cql, $refdb_query);
#    }
}

######################################################################
## _scan: performs the scan operation
######################################################################
#=head2 _scan

#Title   : _scan

#Usage   : $self->scan();

#Function: performs the scan operation

#=cut

sub _scan() {
    my ($self, $operation) = @_;
   
    my $refdb_query;

    if (@{$self->{diagnostics}} == 0) {
	$self->{params}->{'log'}->log_print("debug", "scan");

	my $parser = CQL::Parser->new();
	# uncomment to receive debug output on stderr
        #    $CQL::DEBUG = 1;

	my $root = $parser->parse($self->{params}->{'scanClause'});
	my $visitor = new MyVisitor;

	# pass the database engine to the visitor object to adapt the
	# regular expressions appropriately
	$visitor->{'db_engine'} = $self->{params}->{'db_engine'};

	# parse and translate the query
	$visitor->visit($root);

	$refdb_query = $visitor->{'refdb_query'};
    }

    $self->_run_refdb_scan($refdb_query);

}

######################################################################
## _run_refdb_info: retrieves configInfo from RefDB
######################################################################
#=head2 _run_refdb_info

#Title   : _run_refdb_info

#Usage   : $self->_run_refdb_info();

#Function: retrieves configInfo from RefDB

#=cut

sub _run_refdb_info() {
    my $self = shift;

    # query the database only if there were no query or parse errors
    # up to here
    if (@{$self->{diagnostics}} == 0) {
	my $client = new RefDB::Client;

	$client->set_conninfo($self->{params}->{'server_ip'}, $self->{params}->{'port'}, $self->{params}->{'username'}, $self->{params}->{'password'}, $self->{params}->{'database'}, $self->{params}->{'pdfroot'}, $self->{params}->{'css_url'}, $self->{params}->{'timeout'});


	# save error status and message in case we need it for diagnostics
	my $summary = $client->refdb_whichdb();
	$self->{refdb_error_status} = $client->get_status();
	$self->{refdb_error_message} = $client->get_status_msg();

	if ($self->_process_refdb_summary("scan", $summary)) {
	    $self->{refdb_result} = $client->get_data();
	}
    }

    $self->_wrap_explain_result();

}

######################################################################
## _run_refdb_query: sends the translated query string to refdbd
######################################################################
#=head2 _run_refdb_query

#Title   : _run_refdb_query

#Usage   : $self->_run_refdb_query($query_string);

#Function: sends the query string to refdbd

#Parameter: $query_string: the query in the RefDB query language

#=cut

sub _run_refdb_query() {
    my ($self, $query) = @_;

    # query the database only if there were no query or parse errors
    # up to here
    if (@{$self->{diagnostics}} == 0) {
	my $client = new RefDB::Client;

	$client->set_conninfo($self->{params}->{'server_ip'}, $self->{params}->{'port'}, $self->{params}->{'username'}, $self->{params}->{'password'}, $self->{params}->{'database'}, $self->{params}->{'pdfroot'}, $self->{params}->{'css_url'}, $self->{params}->{'timeout'});

	my $limit_string;
	if (defined($self->{params}->{'maximumRecords'})) {
	    $limit_string = $self->{params}->{'maximumRecords'};
	}
	if (defined($self->{params}->{'startRecord'})
	    && $self->{params}->{'startRecord'} > 1) {
	    # startRecord is the 1-based number of the first reference to
	    # retrieve. SQL OFFSET is the number of references to skip which
	    # is one less than startRecord
	    my $skip = $self->{params}->{'startRecord'} - 1;
	    if (defined($limit_string)) {
		$limit_string .= ":" . $skip;
	    }
	    else {
		$limit_string = "999999999:" . $skip;
	    }
	}
    
	# if we're to retrieve a subset, find out the total number of
	# references that match the query
	if (length($limit_string) > 0) {
	    my $summary = $client->refdb_countref("", "", $query);
	    $self->{params}->{'log'}->log_print("debug", $query);
	    
	    $self->{refdb_error_status} = $client->get_status();
	    $self->{refdb_error_message} = $client->get_status_msg();

	    # use scan mode as the countref command returns only a single count
	    $self->_process_refdb_summary("scan", $summary);
	    $self->{total_num_references} = $self->{num_references};
	}

	# save error status and message in case we need it for diagnostics
	my $summary = $client->refdb_getref($self->{params}->{'recordSchema'}, "", undef, "", "UTF-8", $limit_string, $self->{params}->{'recordSchema'}, $query);
	$self->{params}->{'log'}->log_print("debug", $query);

	$self->{refdb_error_status} = $client->get_status();
	$self->{refdb_error_message} = $client->get_status_msg();

	if ($self->_process_refdb_summary("searchRetrieve", $summary)) {
	    $self->{refdb_result} = $client->get_data();

	    if ($self->{total_num_references} == 0) {
		$self->{total_num_references} = $self->{num_references};
	    }
	}
    }

    $self->_wrap_searchRetrieve_result();

    return $self->{num_references};
}

######################################################################
## _run_refdb_scan: sends the translated scan string to refdbd
######################################################################
#=head2 _run_refdb_scan

#Title   : _run_refdb_scan

#Usage   : $self->_run_refdb_scan($scan_string);

#Function: sends the scan string to refdbd

#Parameter: $scan_string: the scan in the RefDB query language

#=cut

sub _run_refdb_scan() {
    my ($self, $scan) = @_;
    
    my $summary;

    my @scan_pairs;

    # query the database only if there were no query or parse errors
    # up to here
    if (@{$self->{diagnostics}} == 0) {
	my $client = new RefDB::Client;

	$client->set_conninfo($self->{params}->{'server_ip'}, $self->{params}->{'port'}, $self->{params}->{'username'}, $self->{params}->{'password'}, $self->{params}->{'database'}, $self->{params}->{'pdfroot'}, $self->{params}->{'css_url'}, $self->{params}->{'timeout'});


	# first retrieve a full list of index terms
	if ($scan =~ /:KW:/) {
	    $summary = $client->refdb_getkw("", "freq", "");
	    $self->{params}->{'log'}->log_print("debug", "getkw");
	}
	elsif ($scan =~ /:AX:/) {
	    $summary = $client->refdb_getax("", "freq", "");
	    $self->{params}->{'log'}->log_print("debug", "getax");
	}
	elsif ($scan =~ /:JA:/) {
	    $summary = $client->refdb_getjo("", "", "freq", "");
	    $self->{params}->{'log'}->log_print("debug", "getjo");
	}

	my $term = $scan;
	$term =~ s/^:..:[<=>~!][>=~]?(.*)/$1/;

	# save error status and message in case we need it for diagnostics
	$self->{refdb_error_status} = $client->get_status();
	$self->{refdb_error_message} = $client->get_status_msg();

	if ($self->_process_refdb_summary("scan", $summary)) {
	    $self->{refdb_result} = $client->get_data();
	}

	# now we have sorted "lists" formatted as freq:item
	# split them into an array and locate the requested item
	my @pairs = split(/\n/, $self->{refdb_result});
	my $counter = 0;
	foreach my $pair (@pairs) {
	    my $index = index($pair, ":");
	    if ($index > -1) {
		my $value = substr($pair, $index+1);
		my $freq = substr($pair, 0, $index);

		if ($value ge $term) {
		    last;
		}
	    }
	    $counter++;
	}

	# now $counter is an array index to $term. Compute the range
	# of items to return
	my $start_index;

	my $pos = $self->{params}->{'responsePosition'};
	if (!defined($pos)) {
	    $pos = 1;
	}
	my $max = $self->{params}->{'maximumRecords'};
	if (!defined($max)) {
	    # can't return more than we have
	    $max = $counter;
	}

	if ($pos > $max) {
	    $start_index = $counter+1-$max;
	}
	else {
	    $start_index = $counter+1-$pos;
	}

	$self->{num_references} = 0;

	# create a new array containing only the requested items
	foreach ($start_index..$start_index+$max) {
	    push (@scan_pairs, $pairs[$_]);
	    $self->{num_references}++;
	}
    }

    $self->_wrap_scan_result(\@scan_pairs);

    return $self->{num_references};
}

######################################################################
## _process_refdb_summary: processes the refdbd command summary
######################################################################
#=head2 _process_refdb_summary

#Title   : _process_refdb_summary

#Usage   : $self->_process_refdb_summary($command, $summary_string);

#Function: determines whether the command was successful, and reads
#          the number of successful and failed retrievals

#Parameter: $command either "searchRetrieve" or "scan"

#Parameter: $summary_string: the command summary returned by refdbd

#Returns: the number of successfully retrieved references

#=cut
sub _process_refdb_summary {
    my ($self, $command, $summary) = @_;

    my $diag;

    if ($command eq "searchRetrieve"
	&& $summary =~ /^999:(\d+).*:(\d+).*/) {
	$self->{num_references} = $1;
	$self->{num_errors} = $2;
    }
    elsif ($command eq "scan"
	   && $summary =~ /^999:(\d+)/) {
	$self->{num_references} = $1;
	$self->{num_errors} = 0;
    }
    else {
	$self->{num_references} = 0;
	$self->{num_errors} = 1;

	if ($self->{refdb_error_status} == 204
	    || $self->{refdb_error_status} == 208) {
	    $diag = Diagnostics->new(1, "database");
	    push (@{$self->{diagnostics}}, $diag);
	}
	elsif ($self->{refdb_error_status} == 234) {
	    $diag = Diagnostics->new(10, "");
	    push (@{$self->{diagnostics}}, $diag);
	}
	elsif ($self->{refdb_error_status} == 701) {
	    $diag = Diagnostics->new(1, "character encoding");
	    push (@{$self->{diagnostics}}, $diag);
	}
	elsif ($self->{refdb_error_status} == 801) {
	    $diag = Diagnostics->new(1, "memory");
	    push (@{$self->{diagnostics}}, $diag);
	}
    }
    
    $self->{num_references};
}

######################################################################
## _wrap_explain_result: wraps the data in a explain response
######################################################################
#=head2 _wrap_explain_result

#Title   : _wrap_explain_result

#Usage   : $self->_wrap_explain_result();

#Function: wraps the data in a explain response

#=cut
sub _wrap_explain_result {
    my $self = shift;

    # massage whichdb response
    my $response = $self->{refdb_result};

    my $numrefs = $response;
    $numrefs =~ s/.*Number of references: (\d+).*/$1/s;
    my $lastmodified = $response;
    $lastmodified =~ s/.*Last modified: ([\-0-9A-Z: ]+).*/$1/s;

    my $writer = new XML::Writer(OUTPUT => \$self->{response},
				 DATA_MODE => 1,
				 NAMESPACES => 1,
				 PREFIX_MAP => $self->{prefix_map},
				 FORCED_NS_DECLS => [${$self->{rpm}}{'zeerex'}],
				 DATA_INDENT => 2);

    # simplify access to the namespaces
    my $srw = ${$self->{rpm}}{'srw'};
    my $zeerex = ${$self->{rpm}}{'zeerex'};

    my $stylesheet = $self->{params}->{'stylesheet'};

    $writer->xmlDecl("UTF-8");

    if (defined($stylesheet) && length($stylesheet) > 0) {
	my $stylespec = "href=\"" . $stylesheet . "\" type=\"text/xml\"";
	$writer->pi('xml-stylesheet', $stylespec);
    }

    $writer->startTag([$zeerex, "explain"], 
		      "authoritative" => "true");

    ## serverInfo
    $writer->startTag([$zeerex, "serverInfo"],
		      "protocol" => "SRU",
		      "transport" => "http",
		      "version" => "1.1");

    $writer->dataElement([$zeerex, "host"], $self->{params}->{'zeerex_host'});
    $writer->dataElement([$zeerex, "port"], $self->{params}->{'zeerex_port'});
    $writer->dataElement([$zeerex, "database"], $self->{params}->{'zeerex_database'}, "numRecs" => $numrefs, "lastUpdate" => $lastmodified);

    # print only if user/pass configured
    if (length($self->{params}->{'username'}) > 0) {
	$writer->startTag([$zeerex, "authentication"]);
	$writer->dataElement([$zeerex, "user"], $self->{params}->{'username'});
	$writer->dataElement([$zeerex, "password"], $self->{params}->{'password'});
	$writer->endTag([$zeerex, "authentication"]);
    }
    $writer->endTag([$zeerex, "serverInfo"]);

    ## databaseInfo
    $writer->startTag([$zeerex, "databaseInfo"]);
    $writer->dataElement([$zeerex, "title"], $self->{params}->{'zeerex_databaseInfo_title'}, "lang" => "en", "primary" => "true");
    $writer->dataElement([$zeerex, "description"], $self->{params}->{'zeerex_databaseInfo_description'}, "lang" => "en", "primary" => "true");

    $writer->dataElement([$zeerex, "author"], $self->{params}->{'zeerex_databaseInfo_author'});
    $writer->dataElement([$zeerex, "contact"], $self->{params}->{'zeerex_databaseInfo_contact'});
    $writer->dataElement([$zeerex, "langUsage"], "Records are in English",
			 "codes" => "en");
    $writer->endTag([$zeerex, "databaseInfo"]);

    ## metaInfo
    $writer->startTag([$zeerex, "metaInfo"]);

    # dateModified is set to the current time (UTC) as the numRec and
    # lastUpdate fields are set at runtime
    my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = gmtime();
    my $now = sprintf "%4d-%02d-%02dT%02d:%02d:%02dZ", 1900+$year, $mon+1, $mday, $hour, $min, $sec;
    $writer->dataElement([$zeerex, "dateModified"], "$now");
    $writer->endTag([$zeerex, "metaInfo"]);

    ## indexInfo
    ## the bib index uses a subset of the context set proposed by loc, see
    ## http://www.loc.gov/standards/sru/cql-bibliographic-searching.html

    $writer->startTag([$zeerex, "indexInfo"]);
    $writer->emptyTag([$zeerex, "set"], "name" => "bib", "identifier" => "http://www.loc.gov/standards/sru/cql-bibliographic-searching.html");
    $writer->emptyTag([$zeerex, "set"], "name" => "dc", "identifier" => "http://www.loc.gov/zing/cql/dc-indexes/v1.0/");

    ## search by title
    ## bib.titleSub
    ## bib.titleTranslated
    ## bib.titleAlternative

    ## bib.titleUniform, dc.title
    $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false");
    $writer->dataElement([$zeerex, "title"], "Title", "lang" => "en");
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "title", "set" => "bib");
    $writer->endTag([$zeerex, "map"]);
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "title", "set" => "dc");
    $writer->endTag([$zeerex, "map"]);
    $writer->endTag([$zeerex, "index"]);

    ## bib.titleSeries
    $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false");
    $writer->dataElement([$zeerex, "title"], "Series Title", "lang" => "en");
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "seriesTitle", "set" => "bib");
    $writer->endTag([$zeerex, "map"]);
    $writer->endTag([$zeerex, "index"]);

    ## bib.titleAbbreviated (abuse to find periodicals?)
    $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "true");
    $writer->dataElement([$zeerex, "title"], "Title Abbreviated", "lang" => "en");
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "titleAbbrev", "set" => "bib");
    $writer->endTag([$zeerex, "map"]);
    $writer->endTag([$zeerex, "index"]);

    ## search by name
    ## bib.namePersonal
    ## bib.namePersonalFamily
    ## bib.namePersonalGiven
    ## bib.nameCorporate
    ## bib.nameConference

    ## bib.name, dc.creator, dc.contributor
    $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "true");
    $writer->dataElement([$zeerex, "title"], "Name", "lang" => "en");
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "name", "set" => "bib");
    $writer->endTag([$zeerex, "map"]);
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "creator", "set" => "dc");
    $writer->endTag([$zeerex, "map"]);
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "contributor", "set" => "dc");
    $writer->endTag([$zeerex, "map"]);
    $writer->endTag([$zeerex, "index"]);

    ## bib.namePersonal (map to name)
    $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "true");
    $writer->dataElement([$zeerex, "title"], "Personal Name", "lang" => "en");
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "namePersonal", "set" => "bib");
    $writer->endTag([$zeerex, "map"]);
    $writer->endTag([$zeerex, "index"]);

    ## bib.nameCorporate (map to name)
    $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "true");
    $writer->dataElement([$zeerex, "title"], "Corporate Name", "lang" => "en");
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "nameCorporate", "set" => "bib");
    $writer->endTag([$zeerex, "map"]);
    $writer->endTag([$zeerex, "index"]);

    ## search by subject
    ## bib.subjectPlace
    ## bib.subjectTitle
    ## bib.subjectName
    ## bib.subjectOccupation

    ## bib.subject, dc.subject, dc.coverage
    $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "true");
    $writer->dataElement([$zeerex, "title"], "Keyword", "lang" => "en");
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "subject", "set" => "bib");
    $writer->endTag([$zeerex, "map"]);
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "subject", "set" => "dc");
    $writer->endTag([$zeerex, "map"]);
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "coverage", "set" => "dc");
    $writer->endTag([$zeerex, "map"]);
    $writer->endTag([$zeerex, "index"]);


    ## searching by identifier
    ## dc.identifier
    $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false");
    $writer->dataElement([$zeerex, "title"], "Identifier", "lang" => "en");
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "identifier", "set" => "dc");
    $writer->endTag([$zeerex, "map"]);
    $writer->endTag([$zeerex, "index"]);

    ## searching by date
    ## bib.dateIssued, dc.date
    $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false");
    $writer->dataElement([$zeerex, "title"], "Publication Date", "lang" => "en");
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "dateIssued", "set" => "bib");
    $writer->endTag([$zeerex, "map"]);
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "date", "set" => "dc");
    $writer->endTag([$zeerex, "map"]);
    $writer->endTag([$zeerex, "index"]);

    ## bib.dateCreated
    ## bib.dateValid
    ## bib.dateModified
    ## bib.dateCopyright

    ## searching by edition
    ## bib.edition

    ## searching by part
    ## bib.volume
    $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false");
    $writer->dataElement([$zeerex, "title"], "Volume", "lang" => "en");
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "volume", "set" => "bib");
    $writer->endTag([$zeerex, "map"]);
    $writer->endTag([$zeerex, "index"]);

    ## bib.issue
    $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false");
    $writer->dataElement([$zeerex, "title"], "Issue", "lang" => "en");
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "issue", "set" => "bib");
    $writer->endTag([$zeerex, "map"]);
    $writer->endTag([$zeerex, "index"]);

    ## bib.startPage
    $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false");
    $writer->dataElement([$zeerex, "title"], "Start Page", "lang" => "en");
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "startPage", "set" => "bib");
    $writer->endTag([$zeerex, "map"]);
    $writer->endTag([$zeerex, "index"]);

    ## bib.endPage
    $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false");
    $writer->dataElement([$zeerex, "title"], "End Page", "lang" => "en");
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "endPage", "set" => "bib");
    $writer->endTag([$zeerex, "map"]);
    $writer->endTag([$zeerex, "index"]);

    ## dc.publisher
    $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false");
    $writer->dataElement([$zeerex, "title"], "Publisher", "lang" => "en");
    $writer->startTag([$zeerex, "map"]);
    $writer->dataElement([$zeerex, "name"], "publisher", "set" => "dc");
    $writer->endTag([$zeerex, "map"]);
    $writer->endTag([$zeerex, "index"]);


    ## searching by issuance
    ## bib.issuance (translate to TY?)

    $writer->endTag([$zeerex, "indexInfo"]);

    ## schemaInfo
    $writer->startTag([$zeerex, "schemaInfo"]);
    $writer->startTag([$zeerex, "schema"], "identifier" => "info:srw/schema/1/mods-v3.0", "name" => "mods", "location" => "http://www.loc.gov/standards/mods/v3/mods-3-2.xsd", "sort" => "false", "retrieve" => "true");
    $writer->dataElement([$zeerex, "title"], "mods", "lang" => "en");
    $writer->endTag([$zeerex, "schema"]);
    $writer->startTag([$zeerex, "schema"], "identifier" => "-//Markus Hoenicka//DTD Ris V1.1.0//EN", "name" => "risx", "location" => "http://refdb.sourceforge.net/dtd/risx/risx.dtd", "sort" => "false", "retrieve" => "true");
    $writer->dataElement([$zeerex, "title"], "risx", "lang" => "en");
    $writer->endTag([$zeerex, "schema"]);
    $writer->endTag([$zeerex, "schemaInfo"]);

    ## configInfo
    $writer->startTag([$zeerex, "configInfo"]);
    $writer->dataElement([$zeerex, "default"], "bib", "type" => "contextSet");
    $writer->dataElement([$zeerex, "default"], "mods", "type" => "retrieveSchema");
    $writer->dataElement([$zeerex, "setting"], "xml", "type" => "recordPacking");
    $writer->dataElement([$zeerex, "supports"], "all", "type" => "relation");
    $writer->dataElement([$zeerex, "supports"], "any", "type" => "relation");
    $writer->dataElement([$zeerex, "supports"], "exact", "type" => "relation");
    $writer->dataElement([$zeerex, "supports"], "within", "type" => "relation");
    $writer->dataElement([$zeerex, "supports"], "*", "type" => "maskingCharacter");
    $writer->dataElement([$zeerex, "supports"], "?", "type" => "maskingCharacter");
    if ($self->{params}->{'db_engine'} eq "mysql"
	|| $self->{params}->{'db_engine'} eq "pgsql") {
	$writer->dataElement([$zeerex, "supports"], "^", "type" => "anchoring");
    }
    $writer->emptyTag([$zeerex, "supports"], "type" => "scan");
    $writer->endTag([$zeerex, "configInfo"]);

    $writer->endTag([$zeerex, "explain"]);

    $writer->end();
}

######################################################################
## _wrap_searchRetrieve_result: wraps the refdbd result in a
##                              searchRetrieveResult
######################################################################
#=head2 _wrap_searchRetrieve_result

#Title   : _wrap_searchRetrieve_result

#Usage   : $self->_wrap_searchRetrieve_result();

#Function: wraps the refdbd result in a searchRetrieveResult and puts the
#          resulting string in $self->{response}

#=cut
sub _wrap_searchRetrieve_result {
    my $self = shift;

    # the counter will be incremented at the beginning of a loop. We set
    # it to zero or to one less than the requested start record
    my $counter = 0;

    if (defined($self->{params}->{'startRecord'})) {
	$counter = $self->{params}->{'startRecord'} - 1;
    }

    # massage the refdbd result. risx datasets are shipped in a <ris> wrapper
    # which we simply discard. The remainder is split at the <entry> start
    # and end tags
    # mods datasets arrive in a <modsCollection> wrapper, and the entries
    # are called <mods>

    my $response = $self->{refdb_result};
    my @records;

    if ($self->{params}->{'recordSchema'} eq "mods") {
	$response =~ s/.*<mods:modsCollection[^<]*(<mods:mods.*)<\/mods:modsCollection>/$1/s;
	@records = split /\s*\<\/mods:mods\>\s*/, $response;
    }
    else {
	$response =~ s/.*<risx:ris[^<]*(<risx:entry.*)<\/risx:ris>/$1/s;
	@records = split /\s*\<\/risx:entry\>\s*/, $response;
    }

    my $writer;

    if ($self->{params}->{'recordSchema'} eq "mods") {
	$writer = new XML::Writer(OUTPUT => \$self->{response},
				  DATA_MODE => 1,
				  NAMESPACES => 1,
				  PREFIX_MAP => $self->{prefix_map},
				  FORCED_NS_DECLS => [${$self->{rpm}}{'srw'},
						      ${$self->{rpm}}{'diag'},
						      ${$self->{rpm}}{'xcql'},
						      ${$self->{rpm}}{'mods'}],
				  DATA_INDENT => 2);
    }
    else {
	$writer = new XML::Writer(OUTPUT => \$self->{response},
				  DATA_MODE => 1,
				  NAMESPACES => 1,
				  PREFIX_MAP => $self->{prefix_map},
				  FORCED_NS_DECLS => [${$self->{rpm}}{'srw'},
						      ${$self->{rpm}}{'diag'},
						      ${$self->{rpm}}{'xcql'},
						      ${$self->{rpm}}{'risx'}],
				  DATA_INDENT => 2);
    }

    # simplify access to the namespaces
    my $srw = ${$self->{rpm}}{'srw'};
    my $diag = ${$self->{rpm}}{'diag'};
    my $xcql = ${$self->{rpm}}{'xcql'};
    my $risx = ${$self->{rpm}}{'risx'};
    my $mods = ${$self->{rpm}}{'mods'};

    my $stylesheet = $self->{params}->{'stylesheet'};

    $writer->xmlDecl("UTF-8");

    if (defined($stylesheet) && length($stylesheet) > 0) {
	my $stylespec = "href=\"" . $stylesheet . "\" type=\"text/xml\"";
	$writer->pi('xml-stylesheet', $stylespec);
    }

#    if (defined($self->{params}->{'stylesheet'})
#	&& length($self->{params}->{'stylesheet'}) > 0) {
	# is it CSS or XSL?
#	if ($self->{params}->{'stylesheet'} =~ /.xsl$/) {
#	    $self->{response} .= "<?xml-stylesheet type=\"text/xsl\" href=\"" . $self->{params}->{'stylesheet'} . "\"?>";
#	}
#	else {
#	    $self->{response} .= "<?xml-stylesheet type=\"text/css\" href=\"" . $self->{params}->{'stylesheet'} . "\"?>";
#	}
#    }
    
    $writer->startTag([$srw, "searchRetrieveResponse"]);
    $writer->dataElement([$srw, "version"], "1.1");
    $writer->dataElement([$srw, "numberOfRecords"], $self->{total_num_references});

    if ($self->{num_references} > 0) {
	$writer->startTag([$srw, "records"]);

	# loop over all retrieved records
	foreach my $record (@records) {
	    $counter++;
	    $writer->startTag([$srw, "record"]);
	    $writer->dataElement([$srw, "recordPacking"], "XML");
	    if ($self->{params}->{'recordSchema'} eq "mods") {
		$writer->dataElement([$srw, "recordSchema"], $mods);
	    }
	    else {
		$writer->dataElement([$srw, "recordSchema"], $risx);
	    }
	    
	    $writer->startTag([$srw, "recordData"]);
	
	    # strip whitespace at the beginning (the end should not have any
	    # that needs attention), and add back the missing end tag
	    $record =~ s/^\s(.*)/$1/;
	    $record = "  " . $record;

	    if ($self->{params}->{'recordSchema'} eq "mods") {
		$record .= "\n  </mods:mods>\n";
	    }
	    else {
		$record .= "\n  </risx:entry>\n";
	    }

	    $self->{response} .= "\n" . $record;

	    $writer->endTag([$srw, "recordData"]);
	    $writer->dataElement([$srw, "recordPosition"], $counter);
	    $writer->endTag([$srw, "record"]);
	}

	$writer->endTag([$srw, "records"]);
    }

    # nextRecordPosition should be provided only if there are more
    # datasets available
    if ($self->{total_num_references} > $counter) {
	$writer->dataElement([$srw, "nextRecordPosition"], $counter+1);
    }

    # echo the request
    $writer->startTag([$srw, "echoedSearchRetrieveRequest"]);
    $writer->dataElement([$srw, "version"], "1.1");
    $writer->dataElement([$srw, "query"], $self->{params}->{'query'});
    $writer->dataElement([$srw, "recordSchema"], $self->{params}->{'recordSchema'});
    $writer->dataElement([$srw, "recordPacking"], $self->{params}->{'recordPacking'});
    $writer->dataElement([$srw, "stylesheet"], $self->{params}->{'stylesheet'});
    if (defined($self->{params}->{'startRecord'})) {
	$writer->dataElement([$srw, "startRecord"], $self->{params}->{'startRecord'});
    }

    if (defined($self->{params}->{'maximumRecords'})) {
	$writer->dataElement([$srw, "maximumRecords"], $self->{params}->{'maximumRecords'});
    }

    $writer->endTag([$srw, "echoedSearchRetrieveRequest"]);

    # extra response data. We provide the URI to call the script again in order
    # to request further datasets
    $writer->startTag([$srw, "extraResponseData"]);
    my $uri = "http://" . $self->{params}->{'zeerex_host'} . ":" . $self->{params}->{'zeerex_port'} . "/" . $self->{params}->{'zeerex_database'};
    $writer->dataElement("databaseURI", $uri);

    $writer->endTag([$srw, "extraResponseData"]);

    # dump any diagnostics
    if (@{$self->{diagnostics}} > 0) {
	$writer->startTag([$srw, "diagnostics"]);
	
	foreach my $diagnostic (@{$self->{diagnostics}}) {
	    $writer->startTag([$diag, "diagnostic"]);
	    $writer->dataElement([$diag, "uri"], $diagnostic->diag_uri());
	    $writer->dataElement([$diag, "detail"], $diagnostic->diag_detail());
	    $writer->dataElement([$diag, "message"], $diagnostic->diag_message());
	    $writer->endTag([$diag, "diagnostic"]);
	}
	$writer->endTag([$srw, "diagnostics"]);
    }

    $writer->endTag([$srw, "searchRetrieveResponse"]);
}

######################################################################
## _wrap_scan_result: wraps the refdbd result in a
##                              scanResult
######################################################################
#=head2 _wrap_scan_result

#Title   : _wrap_scan_result

#Usage   : $self->_wrap_scan_result();

#Function: wraps the refdbd result in a scanResult and puts the
#          resulting string in $self->{response}
#
#Param: $itemref reference to an array containing freq:item pairs

#=cut
sub _wrap_scan_result {
    my ($self, $itemref) = @_;

    # massage the refdbd result. risx datasets are shipped in a <ris> wrapper
    # which we simply discard. The remainder is split at the <entry> start
    # and end tags

    my $writer = new XML::Writer(OUTPUT => \$self->{response},
				 DATA_MODE => 1,
				 NAMESPACES => 1,
				 PREFIX_MAP => $self->{prefix_map},
				 FORCED_NS_DECLS => [${$self->{rpm}}{'srw'},
						     ${$self->{rpm}}{'diag'},
						     ${$self->{rpm}}{'xcql'},
						     ${$self->{rpm}}{'risx'}],
				 DATA_INDENT => 2);

    # simplify access to the namespaces
    my $srw = ${$self->{rpm}}{'srw'};
    my $diag = ${$self->{rpm}}{'diag'};
    my $xcql = ${$self->{rpm}}{'xcql'};
    my $risx = ${$self->{rpm}}{'risx'};

    my $stylesheet = $self->{params}->{'stylesheet'};

    $writer->xmlDecl("UTF-8");

    if (defined($stylesheet) && length($stylesheet) > 0) {
	my $stylespec = "href=\"" . $stylesheet . "\" type=\"text/xml\"";
	$writer->pi('xml-stylesheet', $stylespec);
    }

    $writer->startTag([$srw, "scanResponse"]);
    $writer->dataElement([$srw, "version"], "1.1");

    if ($self->{num_references} > 0) {
	$writer->startTag([$srw, "terms"]);

	# loop over all retrieved records
	foreach my $pair (@{$itemref}) {
	    my $index = index($pair, ":");
	    if ($index > -1) {
		my $value = substr($pair, $index+1);
		my $freq = substr($pair, 0, $index);
		# the frequency string has leading whitespace for padding
		$freq =~ s/^\s+//;

		$writer->startTag([$srw, "term"]);
		$writer->dataElement([$srw, "value"], $value);
		$writer->dataElement([$srw, "numberOfRecords"], $freq);
	    
		$writer->endTag([$srw, "term"]);
	    }
	}

	$writer->endTag([$srw, "terms"]);
    }

    if (@{$self->{diagnostics}} > 0) {
	$writer->startTag([$srw, "diagnostics"]);
	
	foreach my $diagnostic (@{$self->{diagnostics}}) {
	    $writer->startTag([$diag, "diagnostic"]);
	    $writer->dataElement([$diag, "uri"], $diagnostic->diag_uri());
	    $writer->dataElement([$diag, "detail"], $diagnostic->diag_detail());
	    $writer->dataElement([$diag, "message"], $diagnostic->diag_message());
	    $writer->endTag([$diag, "diagnostic"]);
	}
	$writer->endTag([$srw, "diagnostics"]);
    }

    $writer->endTag([$srw, "scanResponse"]);
}

1;
__END__

