"Fossies" - the Fresh Open Source Software Archive 
Member "koha-19.11.15/Koha/SearchEngine/Elasticsearch/Search.pm" (23 Feb 2021, 18465 Bytes) of package /linux/misc/koha-19.11.15.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "Search.pm" see the
Fossies "Dox" file reference documentation.
1 package Koha::SearchEngine::Elasticsearch::Search;
2
3 # Copyright 2014 Catalyst IT
4 #
5 # This file is part of Koha.
6 #
7 # Koha is free software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License as published by the Free Software
9 # Foundation; either version 3 of the License, or (at your option) any later
10 # version.
11 #
12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License along
17 # with Koha; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19
20 =head1 NAME
21
22 Koha::SearchEngine::Elasticsearch::Search - search functions for Elasticsearch
23
24 =head1 SYNOPSIS
25
26 my $searcher =
27 Koha::SearchEngine::Elasticsearch::Search->new( { index => $index } );
28 my $builder = Koha::SearchEngine::Elasticsearch::QueryBuilder->new(
29 { index => $index } );
30 my $query = $builder->build_query('perl');
31 my $results = $searcher->search($query);
32 print "There were " . $results->total . " results.\n";
33 $results->each(sub {
34 push @hits, @_[0];
35 });
36
37 =head1 METHODS
38
39 =cut
40
41 use Modern::Perl;
42
43 use base qw(Koha::SearchEngine::Elasticsearch);
44 use C4::Context;
45 use C4::AuthoritiesMarc;
46 use Koha::ItemTypes;
47 use Koha::AuthorisedValues;
48 use Koha::SearchEngine::QueryBuilder;
49 use Koha::SearchEngine::Search;
50 use Koha::Exceptions::Elasticsearch;
51 use MARC::Record;
52 use Catmandu::Store::ElasticSearch;
53 use MARC::File::XML;
54 use Data::Dumper; #TODO remove
55 use Carp qw(cluck);
56 use MIME::Base64;
57
58 Koha::SearchEngine::Elasticsearch::Search->mk_accessors(qw( store ));
59
60 =head2 search
61
62 my $results = $searcher->search($query, $page, $count, %options);
63
64 Run a search using the query. It'll return C<$count> results, starting at page
65 C<$page> (C<$page> counts from 1, anything less that, or C<undef> becomes 1.)
66 C<$count> is also the number of entries on a page.
67
68 C<%options> is a hash containing extra options:
69
70 =over 4
71
72 =item offset
73
74 If provided, this overrides the C<$page> value, and specifies the record as
75 an offset (i.e. the number of the record to start with), rather than a page.
76
77 =back
78
79 Returns
80
81 =cut
82
83 sub search {
84 my ($self, $query, $page, $count, %options) = @_;
85
86 my $params = $self->get_elasticsearch_params();
87 # 20 is the default number of results per page
88 $query->{size} = $count // 20;
89 # ES doesn't want pages, it wants a record to start from.
90 if (exists $options{offset}) {
91 $query->{from} = $options{offset};
92 } else {
93 $page = (!defined($page) || ($page <= 0)) ? 0 : $page - 1;
94 $query->{from} = $page * $query->{size};
95 }
96 my $elasticsearch = $self->get_elasticsearch();
97 my $results = eval {
98 $elasticsearch->search(
99 index => $params->{index_name},
100 body => $query
101 );
102 };
103 if ($@) {
104 die $self->process_error($@);
105 }
106 return $results;
107 }
108
109 =head2 count
110
111 my $count = $searcher->count($query);
112
113 This mimics a search request, but just gets the result count instead. That's
114 faster than pulling all the data in, usually.
115
116 =cut
117
118 sub count {
119 my ( $self, $query ) = @_;
120
121 my $params = $self->get_elasticsearch_params();
122 $self->store(
123 Catmandu::Store::ElasticSearch->new( %$params, trace_calls => 0, ) )
124 unless $self->store;
125
126 my $search = $self->store->bag->search( %$query);
127 my $count = $search->total() || 0;
128 return $count;
129 }
130
131 =head2 search_compat
132
133 my ( $error, $results, $facets ) = $search->search_compat(
134 $query, $simple_query, \@sort_by, \@servers,
135 $results_per_page, $offset, undef, $item_types,
136 $query_type, $scan
137 )
138
139 A search interface somewhat compatible with L<C4::Search->getRecords>. Anything
140 that is returned in the query created by build_query_compat will probably
141 get ignored here, along with some other things (like C<@servers>.)
142
143 =cut
144
145 sub search_compat {
146 my (
147 $self, $query, $simple_query, $sort_by,
148 $servers, $results_per_page, $offset, $branches,
149 $item_types, $query_type, $scan
150 ) = @_;
151
152 if ( $scan ) {
153 return $self->_aggregation_scan( $query, $results_per_page, $offset );
154 }
155
156 my %options;
157 if ( !defined $offset or $offset < 0 ) {
158 $offset = 0;
159 }
160 $options{offset} = $offset;
161 my $results = $self->search($query, undef, $results_per_page, %options);
162
163 # Convert each result into a MARC::Record
164 my @records;
165 # opac-search expects results to be put in the
166 # right place in the array, according to $offset
167 my $index = $offset;
168 my $hits = $results->{'hits'};
169 foreach my $es_record (@{$hits->{'hits'}}) {
170 $records[$index++] = $self->decode_record_from_result($es_record->{'_source'});
171 }
172
173 # consumers of this expect a name-spaced result, we provide the default
174 # configuration.
175 my %result;
176 $result{biblioserver}{hits} = $hits->{'total'};
177 $result{biblioserver}{RECORDS} = \@records;
178 return (undef, \%result, $self->_convert_facets($results->{aggregations}));
179 }
180
181 =head2 search_auth_compat
182
183 my ( $results, $total ) =
184 $searcher->search_auth_compat( $query, $offset, $count, $skipmetadata, %options );
185
186 This has a similar calling convention to L<search>, however it returns its
187 results in a form the same as L<C4::AuthoritiesMarc::SearchAuthorities>.
188
189 =cut
190
191 sub search_auth_compat {
192 my ($self, $query, $offset, $count, $skipmetadata, %options) = @_;
193
194 if ( !defined $offset or $offset <= 0 ) {
195 $offset = 1;
196 }
197 # Uh, authority search uses 1-based offset..
198 $options{offset} = $offset - 1;
199 my $database = Koha::Database->new();
200 my $schema = $database->schema();
201 my $res = $self->search($query, undef, $count, %options);
202
203 my $bib_searcher = Koha::SearchEngine::Elasticsearch::Search->new({index => 'biblios'});
204 my @records;
205 my $hits = $res->{'hits'};
206 foreach my $es_record (@{$hits->{'hits'}}) {
207 my $record = $es_record->{'_source'};
208 my %result;
209
210 # We are using the authid to create links, we should honor the authid as stored in the db, not
211 # the 001 which, in some circumstances, can contain other data
212 my $authid = $es_record->{_id};
213
214
215 $result{authid} = $authid;
216
217 if (!defined $skipmetadata || !$skipmetadata) {
218 # TODO put all this info into the record at index time so we
219 # don't have to go and sort it all out now.
220 my $authtypecode = $record->{authtype};
221 my $rs = $schema->resultset('AuthType')
222 ->search( { authtypecode => $authtypecode } );
223
224 # FIXME there's an assumption here that we will get a result.
225 # the original code also makes an assumption that some provided
226 # authtypecode may sometimes be used instead of the one stored
227 # with the record. It's not documented why this is the case, so
228 # it's not reproduced here yet.
229 my $authtype = $rs->single;
230 my $auth_tag_to_report = $authtype ? $authtype->auth_tag_to_report : "";
231 my $marc = $self->decode_record_from_result($record);
232 my $mainentry = $marc->field($auth_tag_to_report);
233 my $reported_tag;
234 if ($mainentry) {
235 foreach ( $mainentry->subfields() ) {
236 $reported_tag .= '$' . $_->[0] . $_->[1];
237 }
238 }
239 # Turn the resultset into a hash
240 $result{authtype} = $authtype ? $authtype->authtypetext : $authtypecode;
241 $result{reported_tag} = $reported_tag;
242
243 # Reimplementing BuildSummary is out of scope because it'll be hard
244 $result{summary} =
245 C4::AuthoritiesMarc::BuildSummary( $marc, $result{authid},
246 $authtypecode );
247 $result{used} = $self->count_auth_use($bib_searcher, $authid);
248 }
249 push @records, \%result;
250 }
251 return ( \@records, $hits->{'total'} );
252 }
253
254 =head2 count_auth_use
255
256 my $count = $auth_searcher->count_auth_use($bib_searcher, $authid);
257
258 This runs a search to determine the number of records that reference the
259 specified authid. C<$bib_searcher> must be something compatible with
260 elasticsearch, as the query is built in this function.
261
262 =cut
263
264 sub count_auth_use {
265 my ($self, $bib_searcher, $authid) = @_;
266
267 my $query = {
268 query => {
269 bool => {
270 # query => { match_all => {} },
271 filter => { term => { 'koha-auth-number' => $authid } }
272 }
273 }
274 };
275 $bib_searcher->count($query);
276 }
277
278 =head2 simple_search_compat
279
280 my ( $error, $marcresults, $total_hits ) =
281 $searcher->simple_search( $query, $offset, $max_results, %options );
282
283 This is a simpler interface to the searching, intended to be similar enough to
284 L<C4::Search::SimpleSearch>.
285
286 Arguments:
287
288 =over 4
289
290 =item C<$query>
291
292 A thing to search for. It could be a simple string, or something constructed
293 with the appropriate QueryBuilder module.
294
295 =item C<$offset>
296
297 How many results to skip from the start of the results.
298
299 =item C<$max_results>
300
301 The max number of results to return. The default is 100 (because unlimited
302 is a pretty terrible thing to do.)
303
304 =item C<%options>
305
306 These options are unused by Elasticsearch
307
308 =back
309
310 Returns:
311
312 =over 4
313
314 =item C<$error>
315
316 if something went wrong, this'll contain some kind of error
317 message.
318
319 =item C<$marcresults>
320
321 an arrayref of MARC::Records (note that this is different from the
322 L<C4::Search> version which will return plain XML, but too bad.)
323
324 =item C<$total_hits>
325
326 the total number of results that this search could have returned.
327
328 =back
329
330 =cut
331
332 sub simple_search_compat {
333 my ($self, $query, $offset, $max_results) = @_;
334
335 return ('No query entered', undef, undef) unless $query;
336
337 my %options;
338 $offset = 0 if not defined $offset or $offset < 0;
339 $options{offset} = $offset;
340 $max_results //= 100;
341
342 unless (ref $query) {
343 # We'll push it through the query builder to sanitise everything.
344 my $qb = Koha::SearchEngine::QueryBuilder->new({index => $self->index});
345 (undef,$query) = $qb->build_query_compat(undef, [$query]);
346 }
347 my $results = $self->search($query, undef, $max_results, %options);
348 my @records;
349 my $hits = $results->{'hits'};
350 foreach my $es_record (@{$hits->{'hits'}}) {
351 push @records, $self->decode_record_from_result($es_record->{'_source'});
352 }
353 return (undef, \@records, $hits->{'total'});
354 }
355
356 =head2 extract_biblionumber
357
358 my $biblionumber = $searcher->extract_biblionumber( $searchresult );
359
360 $searchresult comes from simple_search_compat.
361
362 Returns the biblionumber from the search result record.
363
364 =cut
365
366 sub extract_biblionumber {
367 my ( $self, $searchresultrecord ) = @_;
368 return Koha::SearchEngine::Search::extract_biblionumber( $searchresultrecord );
369 }
370
371 =head2 decode_record_from_result
372 my $marc_record = $self->decode_record_from_result(@result);
373
374 Extracts marc data from Elasticsearch result and decodes to MARC::Record object
375
376 =cut
377
378 sub decode_record_from_result {
379 # Result is passed in as array, will get flattened
380 # and first element will be $result
381 my ( $self, $result ) = @_;
382 if ($result->{marc_format} eq 'base64ISO2709') {
383 return MARC::Record->new_from_usmarc(decode_base64($result->{marc_data}));
384 }
385 elsif ($result->{marc_format} eq 'MARCXML') {
386 return MARC::Record->new_from_xml($result->{marc_data}, 'UTF-8', uc C4::Context->preference('marcflavour'));
387 }
388 elsif ($result->{marc_format} eq 'ARRAY') {
389 return $self->_array_to_marc($result->{marc_data_array});
390 }
391 else {
392 Koha::Exceptions::Elasticsearch->throw("Missing marc_format field in Elasticsearch result");
393 }
394 }
395
396 =head2 max_result_window
397
398 Returns the maximum number of results that can be fetched
399
400 This directly requests Elasticsearch for the setting index.max_result_window (or
401 the default value for this setting in case it is not set)
402
403 =cut
404
405 sub max_result_window {
406 my ($self) = @_;
407
408 $self->store(
409 Catmandu::Store::ElasticSearch->new(%{ $self->get_elasticsearch_params })
410 ) unless $self->store;
411
412 my $index_name = $self->store->index_name;
413 my $settings = $self->store->es->indices->get_settings(
414 index => $index_name,
415 params => { include_defaults => 'true', flat_settings => 'true' },
416 );
417
418 my $max_result_window = $settings->{$index_name}->{settings}->{'index.max_result_window'};
419 $max_result_window //= $settings->{$index_name}->{defaults}->{'index.max_result_window'};
420
421 return $max_result_window;
422 }
423
424 =head2 _convert_facets
425
426 my $koha_facets = _convert_facets($es_facets);
427
428 Converts elasticsearch facets types to the form that Koha expects.
429 It expects the ES facet name to match the Koha type, for example C<itype>,
430 C<au>, C<su-to>, etc.
431
432 =cut
433
434 sub _convert_facets {
435 my ( $self, $es, $exp_facet ) = @_;
436
437 return if !$es;
438
439 # These should correspond to the ES field names, as opposed to the CCL
440 # things that zebra uses.
441 my %type_to_label;
442 my %label = (
443 author => 'Authors',
444 itype => 'ItemTypes',
445 location => 'Location',
446 'su-geo' => 'Places',
447 'title-series' => 'Series',
448 subject => 'Topics',
449 ccode => 'CollectionCodes',
450 holdingbranch => 'HoldingLibrary',
451 homebranch => 'HomeLibrary',
452 ln => 'Language',
453 );
454 my @facetable_fields =
455 Koha::SearchEngine::Elasticsearch->get_facetable_fields;
456 for my $f (@facetable_fields) {
457 next unless defined $f->facet_order;
458 $type_to_label{ $f->name } =
459 { order => $f->facet_order, label => $label{ $f->name } };
460 }
461
462 # We also have some special cases, e.g. itypes that need to show the
463 # value rather than the code.
464 my @itypes = Koha::ItemTypes->search;
465 my @libraries = Koha::Libraries->search;
466 my $library_names = { map { $_->branchcode => $_->branchname } @libraries };
467 my @locations = Koha::AuthorisedValues->search( { category => 'LOC' } );
468 my $opac = C4::Context->interface eq 'opac' ;
469 my %special = (
470 itype => { map { $_->itemtype => $_->description } @itypes },
471 location => { map { $_->authorised_value => ( $opac ? ( $_->lib_opac || $_->lib ) : $_->lib ) } @locations },
472 holdingbranch => $library_names,
473 homebranch => $library_names
474 );
475 my @facets;
476 $exp_facet //= '';
477 while ( my ( $type, $data ) = each %$es ) {
478 next if !exists( $type_to_label{$type} );
479
480 # We restrict to the most popular $limit !results
481 my $limit = C4::Context->preference('FacetMaxCount');
482 my $facet = {
483 type_id => $type . '_id',
484 "type_label_$type_to_label{$type}{label}" => 1,
485 type_link_value => $type,
486 order => $type_to_label{$type}{order},
487 };
488 $limit = @{ $data->{buckets} } if ( $limit > @{ $data->{buckets} } );
489 foreach my $term ( @{ $data->{buckets} }[ 0 .. $limit - 1 ] ) {
490 my $t = $term->{key};
491 my $c = $term->{doc_count};
492 my $label;
493 if ( exists( $special{$type} ) ) {
494 $label = $special{$type}->{$t} // $t;
495 }
496 else {
497 $label = $t;
498 }
499 push @{ $facet->{facets} }, {
500 facet_count => $c,
501 facet_link_value => $t,
502 facet_title_value => $t . " ($c)",
503 facet_label_value => $label, # TODO either truncate this,
504 # or make the template do it like it should anyway
505 type_link_value => $type,
506 };
507 }
508 push @facets, $facet if exists $facet->{facets};
509 }
510
511 @facets = sort { $a->{order} <=> $b->{order} } @facets;
512 return \@facets;
513 }
514
515 =head2 _aggregation_scan
516
517 my $result = $self->_aggregration_scan($query, 10, 0);
518
519 Perform an aggregation request for scan purposes.
520
521 =cut
522
523 sub _aggregation_scan {
524 my ($self, $query, $results_per_page, $offset) = @_;
525
526 if (!scalar(keys %{$query->{aggregations}})) {
527 my %result = {
528 biblioserver => {
529 hits => 0,
530 RECORDS => undef
531 }
532 };
533 return (undef, \%result, undef);
534 }
535 my ($field) = keys %{$query->{aggregations}};
536 $query->{aggregations}{$field}{terms}{size} = 1000;
537 my $results = $self->search($query, 1, 0);
538
539 # Convert each result into a MARC::Record
540 my (@records, $index);
541 # opac-search expects results to be put in the
542 # right place in the array, according to $offset
543 $index = $offset - 1;
544
545 my $count = scalar(@{$results->{aggregations}{$field}{buckets}});
546 for (my $index = $offset; $index - $offset < $results_per_page && $index < $count; $index++) {
547 my $bucket = $results->{aggregations}{$field}{buckets}->[$index];
548 # Scan values are expressed as:
549 # - MARC21: 100a (count) and 245a (term)
550 # - UNIMARC: 200f (count) and 200a (term)
551 my $marc = MARC::Record->new;
552 $marc->encoding('UTF-8');
553 if (C4::Context->preference('marcflavour') eq 'UNIMARC') {
554 $marc->append_fields(
555 MARC::Field->new((200, ' ', ' ', 'f' => $bucket->{doc_count}))
556 );
557 $marc->append_fields(
558 MARC::Field->new((200, ' ', ' ', 'a' => $bucket->{key}))
559 );
560 } else {
561 $marc->append_fields(
562 MARC::Field->new((100, ' ', ' ', 'a' => $bucket->{doc_count}))
563 );
564 $marc->append_fields(
565 MARC::Field->new((245, ' ', ' ', 'a' => $bucket->{key}))
566 );
567 }
568 $records[$index] = $marc->as_usmarc();
569 };
570 # consumers of this expect a namespaced result, we provide the default
571 # configuration.
572 my %result;
573 $result{biblioserver}{hits} = $count;
574 $result{biblioserver}{RECORDS} = \@records;
575 return (undef, \%result, undef);
576 }
577
578 1;