The Perfect Full Text Search in Lucene.Net for Sitecore AdvancedDatabaseCrawler


	   protected virtual Query FullTextQueryParse(string query, string defaultField)
        {
            Assert.ArgumentNotNullOrEmpty(query, "query");
            Assert.ArgumentNotNullOrEmpty(defaultField, "defaultField");
            Item luceneSettingItem = SiteContext.Current.GetContentDatabase().GetItem(Helper.Constants.Items.LuceneSettingItemId);

            string[] noiceWords = { };
            string[] superBoostWords = { };
			char[] spaceDelimiter = { ' ' };
			char[] delimiters = new[] {',', ':', '|'};
            
			if (luceneSettingItem["Noise Words"].IsNotNullOrNotEmpty())
            {
                noiceWords = luceneSettingItem.Fields["Noise Words"].Value.Split(delimiters, StringSplitOptions.RemoveEmptyEntries);
            }

            if (luceneSettingItem["Super Boost Words"].IsNotNullOrNotEmpty())
            {
                superBoostWords = luceneSettingItem.Fields["Super Boost Words"].Value.Split(delimiters, StringSplitOptions.RemoveEmptyEntries);
            }

            BooleanQuery booleanQuery = new BooleanQuery();
            BooleanQuery innerBooleanQuery = new BooleanQuery();
            PhraseQuery phraseQuery = new PhraseQuery();

            

            string[] queryArray = query.Split(spaceDelimiter, StringSplitOptions.RemoveEmptyEntries);

            foreach (string s in queryArray)
            {
                Term term = new Term("title", Escape(s.Trim()));
                phraseQuery.Add(term);
            }

            phraseQuery.SetBoost(1.8f);
            phraseQuery.SetSlop(0);
            innerBooleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
            booleanQuery.Add(innerBooleanQuery, BooleanClause.Occur.SHOULD);


            phraseQuery = new PhraseQuery();
            foreach (string s in queryArray)
            {
                Term term = new Term(defaultField, Escape(s.Trim()));
                phraseQuery.Add(term);
            }

            phraseQuery.SetBoost(1.6f);
            phraseQuery.SetSlop(0);
            innerBooleanQuery = new BooleanQuery();
            innerBooleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
            booleanQuery.Add(innerBooleanQuery, BooleanClause.Occur.SHOULD);


            QueryParser queryParser = new QueryParser(defaultField, _analyzer);
            queryParser.SetDefaultOperator(QueryParser.Operator.AND);
            Query baseQuery = null;

            string orignalQuery = query;

            if (noiceWords.Any())
            {
                query = noiceWords.Aggregate(query, (current, word) => current.Replace(word.Trim(), String.Empty).Trim());

                if (query.IsNullOrEmpty())
                    query = orignalQuery;
                else
                {
                    queryArray = query.Split(spaceDelimiter, StringSplitOptions.RemoveEmptyEntries);
                    query = queryArray.Where(s => s.Length < 3).Aggregate(query, (current, s) => current.Replace(s, string.Empty).Trim());

                    if (query.IsNullOrEmpty())
                        query = orignalQuery;
                }
            }

            try
            {
                baseQuery = queryParser.Parse(QueryParser.Escape(query));
            }

            catch (ParseException)
            {
                baseQuery = queryParser.Parse( QueryParser.Escape(query));
            }
            finally
            {
                if (baseQuery != null)
                {
                    baseQuery.SetBoost(1.5f);
                    innerBooleanQuery = new BooleanQuery();
                    innerBooleanQuery.Add(baseQuery, BooleanClause.Occur.MUST);
                    booleanQuery.Add(innerBooleanQuery, BooleanClause.Occur.SHOULD);
                }
            }

            queryParser = new QueryParser(Helper.Constants.BuiltinFields.Title, new KeywordAnalyzer());
            queryParser.SetDefaultOperator(QueryParser.Operator.AND);

            try
            {
                baseQuery = queryParser.Parse( QueryParser.Escape(query));
            }

            catch (ParseException)
            {
                baseQuery = queryParser.Parse( QueryParser.Escape(query));
            }
            finally
            {
                if (baseQuery != null)
                {
                    baseQuery.SetBoost(1.7f);
                    innerBooleanQuery = new BooleanQuery();
                    innerBooleanQuery.Add(baseQuery, BooleanClause.Occur.MUST);
                    booleanQuery.Add(innerBooleanQuery, BooleanClause.Occur.SHOULD);
                }
            }

            queryParser = new QueryParser(Helper.Constants.BuiltinFields.Title, _analyzer);

            try
            {
                baseQuery = queryParser.Parse( QueryParser.Escape(query));
            }

            catch (ParseException)
            {
                baseQuery = queryParser.Parse( QueryParser.Escape(query));
            }
            finally
            {
                if (baseQuery != null)
                {
                    baseQuery.SetBoost(1.4f);
                    innerBooleanQuery = new BooleanQuery();
                    innerBooleanQuery.Add(baseQuery, BooleanClause.Occur.MUST);
                    booleanQuery.Add(innerBooleanQuery, BooleanClause.Occur.SHOULD);
                }
            }

            if (superBoostWords.Any())
            {
                foreach (string superBoostWord in superBoostWords)
                {
                    if (orignalQuery.Contains(superBoostWord))
                    {
                        TermQuery termQuery = new TermQuery(new Term("title",  QueryParser.Escape(superBoostWord)));
                        termQuery.SetBoost(1.9f);
                        innerBooleanQuery = new BooleanQuery();
                        innerBooleanQuery.Add(termQuery, BooleanClause.Occur.MUST);
                        booleanQuery.Add(innerBooleanQuery, BooleanClause.Occur.SHOULD);
                    }
                }
            }

            return booleanQuery;
        }
		
Advertisements

One thought on “The Perfect Full Text Search in Lucene.Net for Sitecore AdvancedDatabaseCrawler

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s