1
2
3
4
5
6 package org.tailormap.api.solr;
7
8 import static org.tailormap.api.scheduling.IndexTask.INDEX_KEY;
9
10 import jakarta.validation.constraints.NotNull;
11 import jakarta.validation.constraints.Positive;
12 import java.io.IOException;
13 import java.lang.invoke.MethodHandles;
14 import java.math.BigDecimal;
15 import java.time.Duration;
16 import java.time.Instant;
17 import java.time.OffsetDateTime;
18 import java.time.ZoneId;
19 import java.util.ArrayList;
20 import java.util.HashMap;
21 import java.util.HashSet;
22 import java.util.List;
23 import java.util.Map;
24 import java.util.Set;
25 import java.util.UUID;
26 import java.util.function.Consumer;
27 import org.apache.solr.client.solrj.SolrClient;
28 import org.apache.solr.client.solrj.SolrQuery;
29 import org.apache.solr.client.solrj.SolrResponse;
30 import org.apache.solr.client.solrj.SolrServerException;
31 import org.apache.solr.client.solrj.impl.BaseHttpSolrClient;
32 import org.apache.solr.client.solrj.request.schema.FieldTypeDefinition;
33 import org.apache.solr.client.solrj.request.schema.SchemaRequest;
34 import org.apache.solr.client.solrj.response.QueryResponse;
35 import org.apache.solr.client.solrj.response.UpdateResponse;
36 import org.apache.solr.client.solrj.response.schema.SchemaResponse;
37 import org.apache.solr.common.SolrDocumentList;
38 import org.apache.solr.common.SolrException;
39 import org.geotools.api.data.Query;
40 import org.geotools.api.data.SimpleFeatureSource;
41 import org.geotools.api.feature.simple.SimpleFeature;
42 import org.geotools.data.simple.SimpleFeatureCollection;
43 import org.geotools.data.simple.SimpleFeatureIterator;
44 import org.locationtech.jts.geom.Geometry;
45 import org.slf4j.Logger;
46 import org.slf4j.LoggerFactory;
47 import org.springframework.lang.NonNull;
48 import org.springframework.lang.Nullable;
49 import org.tailormap.api.admin.model.SearchIndexSummary;
50 import org.tailormap.api.admin.model.TaskProgressEvent;
51 import org.tailormap.api.geotools.featuresources.FeatureSourceFactoryHelper;
52 import org.tailormap.api.geotools.processing.GeometryProcessor;
53 import org.tailormap.api.persistence.SearchIndex;
54 import org.tailormap.api.persistence.TMFeatureType;
55 import org.tailormap.api.repository.SearchIndexRepository;
56 import org.tailormap.api.scheduling.TaskType;
57 import org.tailormap.api.util.Constants;
58 import org.tailormap.api.viewer.model.SearchDocument;
59 import org.tailormap.api.viewer.model.SearchResponse;
60
61
62
63
64
65
66 public class SolrHelper implements AutoCloseable, Constants {
67 private static final Logger logger =
68 LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
69
70
71 private static final String SOLR_SPATIAL_FIELDNAME = "tm_geometry_rpt";
72
73 private final SolrClient solrClient;
74
75
76 private final Map<String, SchemaRequest.AddField> solrSearchFields = Map.of(
77 SEARCH_LAYER,
78 new SchemaRequest.AddField(Map.of(
79 "name", SEARCH_LAYER,
80 "type", "string",
81 "indexed", true,
82 "stored", true,
83 "multiValued", false,
84 "required", true,
85 "uninvertible", false)),
86 INDEX_GEOM_FIELD,
87 new SchemaRequest.AddField(
88 Map.of("name", INDEX_GEOM_FIELD, "type", SOLR_SPATIAL_FIELDNAME, "stored", true)),
89 INDEX_SEARCH_FIELD,
90 new SchemaRequest.AddField(Map.of(
91 "name", INDEX_SEARCH_FIELD,
92 "type", "text_general",
93 "indexed", true,
94 "stored", true,
95 "multiValued", true,
96 "required", true,
97 "uninvertible", false)),
98 INDEX_DISPLAY_FIELD,
99 new SchemaRequest.AddField(Map.of(
100 "name", INDEX_DISPLAY_FIELD,
101 "type", "text_general",
102 "indexed", false,
103 "stored", true,
104 "multiValued", true,
105 "required", true,
106 "uninvertible", false)));
107
108 private int solrQueryTimeout = 7000;
109 private int solrBatchSize = 1000;
110 private String solrGeometryValidationRule = "repairBuffer0";
111
112
113
114
115
116
117 public SolrHelper(@NotNull SolrClient solrClient) {
118 this.solrClient = solrClient;
119 }
120
121
122
123
124
125
126 public SolrHelper withQueryTimeout(
127 @Positive(message = "Must use a positive integer for query timeout") int solrQueryTimeout) {
128 this.solrQueryTimeout = solrQueryTimeout * 1000;
129 return this;
130 }
131
132
133
134
135
136
137 public SolrHelper withBatchSize(@Positive(message = "Must use a positive integer for batching") int solrBatchSize) {
138 this.solrBatchSize = solrBatchSize;
139 return this;
140 }
141
142
143
144
145
146
147
148
149 public SolrHelper withGeometryValidationRule(@NonNull String solrGeometryValidationRule) {
150 if (List.of("error", "none", "repairBuffer0", "repairConvexHull").contains(solrGeometryValidationRule)) {
151 logger.trace("Setting geometry validation rule for Solr geometry field to {}", solrGeometryValidationRule);
152 this.solrGeometryValidationRule = solrGeometryValidationRule;
153 }
154 return this;
155 }
156
157
158
159
160
161
162
163
164
165
166
167
168 @SuppressWarnings("FromTemporalAccessor")
169 public SearchIndex addFeatureTypeIndex(
170 @NotNull SearchIndex searchIndex,
171 @NotNull TMFeatureType tmFeatureType,
172 @NotNull FeatureSourceFactoryHelper featureSourceFactoryHelper,
173 @NotNull SearchIndexRepository searchIndexRepository)
174 throws IOException, SolrServerException {
175
176 Consumer<TaskProgressEvent> progressListener = (event) -> logger.debug("Progress event: {}", event);
177
178 return this.addFeatureTypeIndex(
179 searchIndex, tmFeatureType, featureSourceFactoryHelper, searchIndexRepository, progressListener, null);
180 }
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196 @SuppressWarnings("FromTemporalAccessor")
197 public SearchIndex addFeatureTypeIndex(
198 @NotNull SearchIndex searchIndex,
199 @NotNull TMFeatureType tmFeatureType,
200 @NotNull FeatureSourceFactoryHelper featureSourceFactoryHelper,
201 @NotNull SearchIndexRepository searchIndexRepository,
202 @NotNull Consumer<TaskProgressEvent> progressListener,
203 @Nullable UUID taskUuid)
204 throws IOException, SolrServerException {
205
206 createSchemaIfNotExists();
207
208 final Instant startedAt = Instant.now();
209 final OffsetDateTime startedAtOffset =
210 startedAt.atOffset(ZoneId.systemDefault().getRules().getOffset(startedAt));
211
212 if (null == taskUuid && null != searchIndex.getSchedule()) {
213
214
215
216 taskUuid = searchIndex.getSchedule().getUuid();
217 }
218
219 SearchIndexSummary summary =
220 new SearchIndexSummary().startedAt(startedAtOffset).total(0).duration(0.0);
221
222 if (null == searchIndex.getSearchFieldsUsed()) {
223 logger.warn("No search fields configured for search index: {}, bailing out.", searchIndex.getName());
224 return searchIndexRepository.save(searchIndex
225 .setStatus(SearchIndex.Status.ERROR)
226 .setSummary(summary.errorMessage("No search fields configured")));
227 }
228
229 TaskProgressEvent taskProgressEvent = new TaskProgressEvent()
230 .type(TaskType.INDEX.getValue())
231 .uuid(taskUuid)
232 .startedAt(startedAtOffset)
233 .progress(0)
234 .taskData(Map.of(INDEX_KEY, searchIndex.getId()));
235 progressListener.accept(taskProgressEvent);
236
237
238 List<String> searchFields = searchIndex.getSearchFieldsUsed().stream()
239 .filter(s -> !tmFeatureType.getSettings().getHideAttributes().contains(s))
240 .toList();
241 List<String> displayFields = searchIndex.getSearchDisplayFieldsUsed().stream()
242 .filter(s -> !tmFeatureType.getSettings().getHideAttributes().contains(s))
243 .toList();
244
245 if (searchFields.isEmpty()) {
246 logger.warn("No valid search fields configured for featuretype: {}, bailing out.", tmFeatureType.getName());
247 return searchIndexRepository.save(searchIndex
248 .setStatus(SearchIndex.Status.ERROR)
249 .setSummary(summary.errorMessage("No search fields configured")));
250 }
251
252
253 Set<String> propertyNames = new HashSet<>();
254
255 propertyNames.add(tmFeatureType.getPrimaryKeyAttribute());
256 propertyNames.add(tmFeatureType.getDefaultGeometryAttribute());
257 propertyNames.addAll(searchFields);
258
259 if (!displayFields.isEmpty()) {
260 propertyNames.addAll(displayFields);
261 }
262
263 clearIndexForLayer(searchIndex.getId());
264
265 logger.info(
266 "Indexing started for index id: {}, feature type: {}", searchIndex.getId(), tmFeatureType.getName());
267 searchIndex = searchIndexRepository.save(searchIndex.setStatus(SearchIndex.Status.INDEXING));
268
269
270 SimpleFeatureSource fs = featureSourceFactoryHelper.openGeoToolsFeatureSource(tmFeatureType);
271 Query q = new Query(fs.getName().toString());
272
273 tmFeatureType.getSettings().getHideAttributes().forEach(propertyNames::remove);
274 if (propertyNames.isEmpty()) {
275 logger.warn("No valid properties to index for featuretype: {}, bailing out.", tmFeatureType.getName());
276 return searchIndexRepository.save(searchIndex
277 .setStatus(SearchIndex.Status.ERROR)
278 .setSummary(summary.errorMessage("No valid properties to index")));
279 }
280 q.setPropertyNames(List.copyOf(propertyNames));
281 q.setStartIndex(0);
282
283
284 logger.trace("Indexing query: {}", q);
285 SimpleFeatureCollection simpleFeatureCollection = fs.getFeatures(q);
286 final int total = simpleFeatureCollection.size();
287 List<FeatureIndexingDocument> docsBatch = new ArrayList<>(solrBatchSize);
288
289
290 UpdateResponse updateResponse;
291 int indexCounter = 0;
292 int indexSkippedCounter = 0;
293 try (SimpleFeatureIterator iterator = simpleFeatureCollection.features()) {
294 while (iterator.hasNext()) {
295 indexCounter++;
296 SimpleFeature feature = iterator.next();
297
298 FeatureIndexingDocument doc = new FeatureIndexingDocument(feature.getID(), searchIndex.getId());
299 List<String> searchValues = new ArrayList<>();
300 List<String> displayValues = new ArrayList<>();
301 propertyNames.forEach(propertyName -> {
302 Object value = feature.getAttribute(propertyName);
303 if (value != null) {
304 if (value instanceof Geometry
305 && propertyName.equals(tmFeatureType.getDefaultGeometryAttribute())) {
306
307
308 doc.setGeometry(GeometryProcessor.processGeometry(value, true, true, null));
309 } else {
310 if (searchFields.contains(propertyName)) {
311 searchValues.add(value.toString());
312 }
313 if (displayFields.contains(propertyName)) {
314 displayValues.add(value.toString());
315 }
316 }
317 }
318 });
319 if (searchValues.isEmpty() || displayValues.isEmpty()) {
320
321 logger.trace(
322 "No search or display values found for feature: {} in featuretype: {}, skipped for indexing",
323 feature.getID(),
324 tmFeatureType.getName());
325 indexSkippedCounter++;
326 } else {
327 doc.setSearchFields(searchValues.toArray(new String[0]));
328 doc.setDisplayFields(displayValues.toArray(new String[0]));
329 docsBatch.add(doc);
330 }
331
332 if (indexCounter % solrBatchSize == 0) {
333 updateResponse = solrClient.addBeans(docsBatch, solrQueryTimeout);
334 logger.info(
335 "Added {} documents of {} to index, result status: {}",
336 indexCounter - indexSkippedCounter,
337 total,
338 updateResponse.getStatus());
339 progressListener.accept(
340 taskProgressEvent.total(total).progress((indexCounter - indexSkippedCounter)));
341 docsBatch.clear();
342 }
343 }
344 } finally {
345 if (fs.getDataStore() != null) fs.getDataStore().dispose();
346 }
347
348 if (!docsBatch.isEmpty()) {
349 solrClient.addBeans(docsBatch, solrQueryTimeout);
350 logger.info("Added last {} documents of {} to index", docsBatch.size(), total);
351 progressListener.accept(taskProgressEvent
352 .progress((indexCounter - indexSkippedCounter))
353 .total(total));
354 }
355 final Instant finishedAt = Instant.now();
356 final OffsetDateTime finishedAtOffset =
357 finishedAt.atOffset(ZoneId.systemDefault().getRules().getOffset(finishedAt));
358 Duration processTime = Duration.between(startedAt, finishedAt).abs();
359 logger.info(
360 "Indexing finished for index id: {}, featuretype: {} at {} in {}",
361 searchIndex.getId(),
362 tmFeatureType.getName(),
363 finishedAtOffset,
364 processTime);
365 updateResponse = this.solrClient.commit();
366 logger.trace("Update response commit status: {}", updateResponse.getStatus());
367
368 if (indexSkippedCounter > 0) {
369 logger.warn(
370 "{} features were skipped because no search or display values were found.", indexSkippedCounter);
371 }
372
373 return searchIndexRepository.save(searchIndex
374 .setLastIndexed(finishedAtOffset)
375 .setStatus(SearchIndex.Status.INDEXED)
376 .setSummary(summary.total(total)
377 .skippedCounter(indexSkippedCounter)
378 .duration(BigDecimal.valueOf(processTime.getSeconds())
379 .add(BigDecimal.valueOf(processTime.getNano(), 9))
380 .doubleValue())
381 .errorMessage(null)));
382 }
383
384
385
386
387
388
389
390
391 public void clearIndexForLayer(@NotNull Long searchLayerId) throws IOException, SolrServerException {
392
393 QueryResponse response =
394 solrClient.query(new SolrQuery("exists(query(" + SEARCH_LAYER + ":" + searchLayerId + "))"));
395 if (response.getResults().getNumFound() > 0) {
396 logger.info("Clearing index for searchLayer {}", searchLayerId);
397 UpdateResponse updateResponse = solrClient.deleteByQuery(SEARCH_LAYER + ":" + searchLayerId);
398 logger.trace("Delete response status: {}", updateResponse.getStatus());
399 updateResponse = solrClient.commit();
400 logger.trace("Commit response status: {}", updateResponse.getStatus());
401 } else {
402 logger.info("No index to clear for layer {}", searchLayerId);
403 }
404 }
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419 public SearchResponse findInIndex(
420 @NotNull SearchIndex searchIndex,
421 String solrQuery,
422 String solrFilterQuery,
423 String solrPoint,
424 Double solrDistance,
425 int start,
426 int numResultsToReturn)
427 throws IOException, SolrServerException, SolrException {
428
429 if (null == solrQuery || solrQuery.isBlank()) {
430 solrQuery = "*";
431 }
432
433 logger.info("Query index for '{}' in {} (id {})", solrQuery, searchIndex.getName(), searchIndex.getId());
434
435
436
437
438
439 final SolrQuery query = new SolrQuery(INDEX_SEARCH_FIELD + ":" + solrQuery)
440 .setShowDebugInfo(logger.isDebugEnabled())
441 .setTimeAllowed(solrQueryTimeout)
442 .setIncludeScore(true)
443 .setFields(SEARCH_ID_FIELD, INDEX_DISPLAY_FIELD, INDEX_GEOM_FIELD)
444 .addFilterQuery(SEARCH_LAYER + ":" + searchIndex.getId())
445 .setSort("score", SolrQuery.ORDER.desc)
446 .addSort(SEARCH_ID_FIELD, SolrQuery.ORDER.asc)
447 .setRows(numResultsToReturn)
448 .setStart(start);
449
450 if (null != solrFilterQuery && !solrFilterQuery.isBlank()) {
451 query.addFilterQuery(solrFilterQuery);
452 }
453 if (null != solrPoint && null != solrDistance) {
454 if (null == solrFilterQuery
455 || !(solrFilterQuery.startsWith("{!geofilt") || solrFilterQuery.startsWith("{!bbox"))) {
456 query.addFilterQuery("{!geofilt sfield=" + INDEX_GEOM_FIELD + "}");
457 }
458 query.add("pt", solrPoint);
459 query.add("d", solrDistance.toString());
460 }
461 query.set("q.op", "AND");
462 logger.info("Solr query: {}", query);
463
464 final QueryResponse response = solrClient.query(query);
465 logger.trace("response: {}", response);
466
467 final SolrDocumentList solrDocumentList = response.getResults();
468 logger.debug("Found {} solr documents", solrDocumentList.getNumFound());
469 final SearchResponse searchResponse = new SearchResponse()
470 .total(solrDocumentList.getNumFound())
471 .start(response.getResults().getStart())
472 .maxScore(solrDocumentList.getMaxScore());
473 response.getResults().forEach(solrDocument -> {
474 List<String> displayValues = solrDocument.getFieldValues(INDEX_DISPLAY_FIELD).stream()
475 .map(Object::toString)
476 .toList();
477 searchResponse.addDocumentsItem(new SearchDocument()
478 .fid(solrDocument.getFieldValue(SEARCH_ID_FIELD).toString())
479 .geometry(solrDocument.getFieldValue(INDEX_GEOM_FIELD).toString())
480 .displayValues(displayValues));
481 });
482
483 return searchResponse;
484 }
485
486
487
488
489
490
491 @Override
492 public void close() throws IOException {
493 if (null != this.solrClient) this.solrClient.close();
494 }
495
496 private boolean checkSchemaIfFieldExists(String fieldName) {
497 SchemaRequest.Field fieldCheck = new SchemaRequest.Field(fieldName);
498 try {
499 SchemaResponse.FieldResponse isField = fieldCheck.process(solrClient);
500 logger.debug("Field {} exists", isField.getField());
501 return true;
502 } catch (SolrServerException | BaseHttpSolrClient.RemoteSolrException e) {
503 logger.debug("Field {} does not exist or could not be retrieved. Assuming it does not exist.", fieldName);
504 } catch (IOException e) {
505 logger.error("Tried getting field: {}, but failed.", fieldName, e);
506 }
507 return false;
508 }
509
510
511
512
513
514
515 private void createSchemaFieldIfNotExists(String fieldName) throws SolrServerException, IOException {
516 if (!checkSchemaIfFieldExists(fieldName)) {
517 logger.info("Creating Solr field {}.", fieldName);
518 SchemaRequest.AddField schemaRequest = solrSearchFields.get(fieldName);
519 SolrResponse response = schemaRequest.process(solrClient);
520 logger.debug("Field type {} created", response);
521 solrClient.commit();
522 }
523 }
524
525
526 private void createSchemaIfNotExists() {
527 solrSearchFields.forEach((key, value) -> {
528 try {
529 if (key.equals(INDEX_GEOM_FIELD)) {
530 createGeometryFieldTypeIfNotExists();
531 }
532 createSchemaFieldIfNotExists(key);
533 } catch (SolrServerException | IOException e) {
534 logger.error(
535 "Error creating schema field: {} indexing may fail. Details: {}",
536 key,
537 e.getLocalizedMessage(),
538 e);
539 }
540 });
541 }
542
543 private void createGeometryFieldTypeIfNotExists() throws SolrServerException, IOException {
544 SchemaRequest.FieldType fieldTypeCheck = new SchemaRequest.FieldType(SOLR_SPATIAL_FIELDNAME);
545 try {
546 SchemaResponse.FieldTypeResponse isFieldType = fieldTypeCheck.process(solrClient);
547 logger.debug("Field type {} exists", isFieldType.getFieldType());
548 return;
549 } catch (SolrServerException | BaseHttpSolrClient.RemoteSolrException e) {
550 logger.debug(
551 "Field type {} does not exist or could not be retrieved. Assuming it does not exist.",
552 SOLR_SPATIAL_FIELDNAME);
553 } catch (IOException e) {
554 logger.error("Tried getting field type: {}, but failed.", SOLR_SPATIAL_FIELDNAME, e);
555 }
556
557 logger.info(
558 "Creating Solr field type for {} with validation rule {}",
559 SOLR_SPATIAL_FIELDNAME,
560 solrGeometryValidationRule);
561 FieldTypeDefinition spatialFieldTypeDef = new FieldTypeDefinition();
562 Map<String, Object> spatialFieldAttributes = new HashMap<>(Map.of(
563 "name", SOLR_SPATIAL_FIELDNAME,
564 "class", "solr.SpatialRecursivePrefixTreeFieldType",
565 "spatialContextFactory", "JTS",
566 "geo", false,
567 "distanceUnits", "kilometers",
568 "distCalculator", "cartesian",
569 "format", "WKT",
570 "autoIndex", true,
571 "distErrPct", "0.025",
572 "maxDistErr", "0.001"));
573 spatialFieldAttributes.putAll(Map.of(
574 "prefixTree",
575 "packedQuad",
576
577
578 "validationRule",
579 this.solrGeometryValidationRule,
580
581
582 "worldBounds",
583
584 "ENVELOPE(-20037508.34, 20037508.34, 20048966.1, -20048966.1)"
585
586
587 ));
588 spatialFieldTypeDef.setAttributes(spatialFieldAttributes);
589 SchemaRequest.AddFieldType spatialFieldType = new SchemaRequest.AddFieldType(spatialFieldTypeDef);
590 spatialFieldType.process(solrClient);
591 solrClient.commit();
592 }
593 }