/* * SPDX-License-Identifier: Apache-2.0 * * The OpenSearch Contributors require contributions made to * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ /* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ /* * Modifications Copyright OpenSearch Contributors. See * GitHub history for details. */ package org.opensearch.common.lucene.search; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.queries.SpanMatchNoDocsQuery; import org.apache.lucene.queries.spans.SpanMultiTermQueryWrapper; import org.apache.lucene.queries.spans.SpanOrQuery; import org.apache.lucene.queries.spans.SpanQuery; import org.apache.lucene.queries.spans.SpanTermQuery; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; import java.io.IOException; import java.util.Collection; import java.util.HashSet; import java.util.Set; /** * A span rewrite method that extracts the first maxExpansions terms * that match the {@link MultiTermQuery} in the terms dictionary. * The rewrite throws an error if more than maxExpansions terms are found and hardLimit * is set. * * @opensearch.internal */ public class SpanBooleanQueryRewriteWithMaxClause extends SpanMultiTermQueryWrapper.SpanRewriteMethod { private final int maxExpansions; private final boolean hardLimit; public SpanBooleanQueryRewriteWithMaxClause() { this(BooleanQuery.getMaxClauseCount(), true); } public SpanBooleanQueryRewriteWithMaxClause(int maxExpansions, boolean hardLimit) { this.maxExpansions = maxExpansions; this.hardLimit = hardLimit; } public int getMaxExpansions() { return maxExpansions; } public boolean isHardLimit() { return hardLimit; } @Override public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException { final MultiTermQuery.RewriteMethod delegate = new MultiTermQuery.RewriteMethod() { @Override public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { Collection queries = collectTerms(reader, query); if (queries.size() == 0) { return new SpanMatchNoDocsQuery(query.getField(), "no expansion found for " + query.toString()); } else if (queries.size() == 1) { return queries.iterator().next(); } else { return new SpanOrQuery(queries.toArray(new SpanQuery[0])); } } private Collection collectTerms(IndexReader reader, MultiTermQuery query) throws IOException { Set queries = new HashSet<>(); IndexReaderContext topReaderContext = reader.getContext(); for (LeafReaderContext context : topReaderContext.leaves()) { final Terms terms = context.reader().terms(query.getField()); if (terms == null) { // field does not exist continue; } final TermsEnum termsEnum = getTermsEnum(query, terms, new AttributeSource()); assert termsEnum != null; if (termsEnum == TermsEnum.EMPTY) { continue; } BytesRef bytes; while ((bytes = termsEnum.next()) != null) { if (queries.size() >= maxExpansions) { if (hardLimit) { throw new RuntimeException( "[" + query.toString() + " ] " + "exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]" ); } else { return queries; } } queries.add(new SpanTermQuery(new Term(query.getField(), bytes))); } } return queries; } }; return (SpanQuery) delegate.rewrite(reader, query); } }