记录lucene.net的使用过程

之前公司要做一个信息展示的网站，领导说要用lucene.net来实现全文检索，类似百度的搜索功能，但是本人技术有限，只是基本实现搜索和高亮功能，特此记录；
先看下页面效果，首先我搜索“为什么APP消息没有推送”，出来的结果如下图：
然后我再搜索“醒消息推”，出来结果如下图：
然后说下，我使用的是Lucene.net版本是2.9.22，盘古分词的版本是2.3.1，注意，版本lucene.net和盘古分词的版本一定要对上，之前我用Lucene.net3.0的版本，就一直有错误，后来换到低版本才没问题的；
接着是关键的类LuceneHelper，如下所示：
 public class LuceneHelper
     {
         readonly LogHelper _logHelper = new LogHelper(MethodBase.GetCurrentMethod());
         private LuceneHelper() { }         #region 单例
         private static LuceneHelper _instance = null;
         private static readonly object Lock = new object();
         /// <summary>
         /// 单例
         /// </summary>
         public static LuceneHelper instance
         {
             get
             {
                 lock (Lock)
                 {
                     if (_instance == null)
                     {
                         _instance = new LuceneHelper();
                         PanGu.Segment.Init(PanGuXmlPath);//使用盘古分词，一定要记得初始化
                     }
                     return _instance;
                 }
             }
         }
         #endregion         #region 分词测试         /// <summary>
         /// 处理关键字为索引格式
         /// </summary>
         /// <param name="keywords"></param>
         /// <returns></returns>
         private string GetKeyWordsSplitBySpace(string keywords)
         {
             PanGuTokenizer ktTokenizer = new PanGuTokenizer();//使用盘古分词器来吧关键字分词
             StringBuilder result = new StringBuilder();
             ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);
             foreach (WordInfo word in words)
             {
                 if (word == null)
                 {
                     continue;
                 }
                 //result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank));
                 result.AppendFormat("{0} ", word.Word);
             }
             return result.ToString().Trim();
         }
         #endregion         #region 创建索引
         /// <summary>
         /// 创建索引
         /// </summary>
         /// <param name="datalist"></param>
         /// <returns></returns>
         public bool CreateIndex<T>(IList<T> datalist)
         {
             IndexWriter writer = null;
             try
             {
                 writer = new IndexWriter(directory_luce, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);//false表示追加（true表示删除之前的重新写入）
                 //writer = new IndexWriter(directory_luce, null, true, IndexWriter.MaxFieldLength.LIMITED);//false表示追加（true表示删除之前的重新写入）
             }
             catch
             {
                 writer = new IndexWriter(directory_luce, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);//false表示追加（true表示删除之前的重新写入）
                 //writer = new IndexWriter(directory_luce, null, true, IndexWriter.MaxFieldLength.LIMITED);//false表示追加（true表示删除之前的重新写入）
             }
             foreach (var data in datalist)
             {
                 CreateIndex<T>(writer, data);
             }
             writer.Optimize();
             writer.Close();
             return true;
         }         public bool CreateIndex<T>(IndexWriter writer, T data)
         {
             try
             {                 if (data == null) return false;
                 Document doc = new Document();
                 Type type = data.GetType();                 //创建类的实例
                 //object obj = Activator.CreateInstance(type, true);
                 //获取公共属性
                 PropertyInfo[] Propertys = type.GetProperties();
                 for (int i = ; i < Propertys.Length; i++)
                 {
                     //Propertys[i].SetValue(Propertys[i], i, null); //设置值
                     PropertyInfo pi = Propertys[i];
                     string name = pi.Name;
                     object objval = pi.GetValue(data, null);
                     string value = objval == null ? "" : objval.ToString(); //值
                     if (name.ToLower() == "id" || name.ToLower() == "type")//id在写入索引时必是不分词，否则是模糊搜索和删除，会出现混乱
                     {
                         doc.Add(new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED));//id不分词
                     }
                     else if (name.ToLower() == "IsNewest".ToLower())
                     {
                         //doc.Add(new Field(name, value, Field.Store.NO, Field.Index.ANALYZED_NO_NORMS));//分词建索引，但是Field的值不像通常那样被保存，而是只取一个byte，这样节约存储空间
                         doc.Add(new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED));//IsNewest不分词
                     }
                     else if (name.ToLower() == "IsReqular".ToLower())
                     {
                         //doc.Add(new Field(name, value, Field.Store.NO, Field.Index.ANALYZED_NO_NORMS));//分词建索引，但是Field的值不像通常那样被保存，而是只取一个byte，这样节约存储空间
                         doc.Add(new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED));//IsReqular不分词
                     }
                     else
                     {
                         if (name.ToLower() == "Contents".ToLower())
                         {
                             value = GetNoHtml(value);//去除正文的html标签
                         }
                         doc.Add(new Field(name, value, Field.Store.YES, Field.Index.ANALYZED));//其他字段分词
                     }
                 }
                 writer.AddDocument(doc);
             }
             catch (System.IO.FileNotFoundException fnfe)
             {
                 throw fnfe;
             }
             return true;
         }
         #endregion         #region 在title和content字段中查询数据,该方法未使用，可能有错漏，我使用的是下面的分页查询的；
         /// <summary>
         /// 在title和content字段中查询数据
         /// </summary>
         /// <param name="keyword"></param>
         /// <returns></returns>
         public List<Questions> Search(string keyword)
         {             string[] fileds = { "Title", "Contents" };//查询字段
             //Stopwatch st = new Stopwatch();
             //st.Start();
             QueryParser parser = null;// new QueryParser(Lucene.Net.Util.Version.LUCENE_30, field, analyzer);//一个字段查询
             parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询
             Query query = parser.Parse(keyword);
             int n = ;
             IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读
             TopDocs docs = searcher.Search(query, (Filter)null, n);
             if (docs == null || docs.totalHits == )
             {
                 return null;
             }
             else
             {
                 List<Questions> list = new List<Questions>();
                 int counter = ;
                 foreach (ScoreDoc sd in docs.scoreDocs)//遍历搜索到的结果
                 {
                     try
                     {
                         Document doc = searcher.Doc(sd.doc);                         string id = doc.Get("ID");
                         string title = doc.Get("Title");
                         string content = doc.Get("Contents");                         string createdate = doc.Get("AddTime");
                         PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">", "</font>");
                         PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());
                         highlighter.FragmentSize = Int32.MaxValue;
                         content = highlighter.GetBestFragment(keyword, content);
                         string titlehighlight = highlighter.GetBestFragment(keyword, title);
                         if (titlehighlight != "") title = titlehighlight;                         Questions model = new Questions
                         {
                             ID = int.Parse(id),
                             Title = title,
                             Contents = content,
                             AddTime = DateTime.Parse(createdate)
                         };                         list.Add(model);
                     }
                     catch (Exception ex)
                     {
                         Console.WriteLine(ex.Message);
                     }
                     counter++;
                 }
                 return list;
             }
             //st.Stop();
             //Response.Write("查询时间：" + st.ElapsedMilliseconds + " 毫秒<br/>");         }
         #endregion         #region 在不同的分类下再根据title和content字段中查询数据(分页)
         /// <summary>
         /// 在不同的类型下再根据title和content字段中查询数据(分页)
         /// </summary>
         /// <param name="_type">分类,传空值查询全部</param>
         /// <param name="keyword"></param>
         /// <param name="PageIndex"></param>
         /// <param name="PageSize"></param>
         /// <param name="TotalCount"></param>
         /// <returns></returns>
         public List<Questions> Search(string _type,bool? _isnew,bool? _isreq ,string keyword, int PageIndex, int PageSize, out int TotalCount)
         {
             try
             {
                 if (PageIndex < ) PageIndex = ;
                 //Stopwatch st = new Stopwatch();
                 //st.Start();
                 BooleanQuery bq = new BooleanQuery();
                 if (_type != "" && _type != "-100")
                 {
                     QueryParser qpflag = new QueryParser(version, "Type", analyzer);//一个字段查询
                     Query qflag = qpflag.Parse(_type);
                     bq.Add(qflag, Lucene.Net.Search.BooleanClause.Occur.MUST);//与运算
                 }
                 if (_isnew.HasValue)
                 {
                     QueryParser qpnew = new QueryParser(version, "IsNewest", analyzer);
                     Query qnew = qpnew.Parse(_isnew.Value.ToString());
                     bq.Add(qnew, Lucene.Net.Search.BooleanClause.Occur.MUST);
                 }
                 if (_isreq.HasValue)
                 {
                     QueryParser qpreq = new QueryParser(version, "IsReqular", analyzer);
                     Query qreq = qpreq.Parse(_isnew.Value.ToString());
                     bq.Add(qreq, Lucene.Net.Search.BooleanClause.Occur.MUST);
                 }                 string keyword2 = keyword;
                 if (keyword != "")
                 {                     keyword = GetKeyWordsSplitBySpace(keyword);                     string[] fileds = { "Title", "Contents" };//查询字段
                     QueryParser parser = null;// new QueryParser(version, field, analyzer);//一个字段查询
                     parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询
                     //parser.DefaultOperator = QueryParser.Operator.OR;
                     parser.SetDefaultOperator(QueryParser.Operator.OR);//这里QueryParser.Operator.OR表示并行结果，相当于模糊搜索，QueryParser.Operator.AND相当于精准搜索
                     Query queryKeyword = parser.Parse(keyword);                     bq.Add(queryKeyword, Lucene.Net.Search.BooleanClause.Occur.MUST);//与运算
                 }                 //TopScoreDocCollector collector = TopScoreDocCollector.Create(PageIndex * PageSize, false);
                 IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读                 //Sort sort = new Sort(new SortField("AddTime", SortField.DOC, false)); //此处为结果排序功能，但是使用排序会影响搜索权重（类似百度搜索排名机制）
                 //TopDocs topDocs = searcher.Search(bq, null, PageIndex * PageSize, sort);
                 TopDocs topDocs = searcher.Search(bq, null, PageIndex * PageSize);
                 //searcher.Search(bq, collector);
                 if (topDocs == null || topDocs.totalHits == )
                 {
                     TotalCount = ;
                     return null;
                 }
                 else
                 {
                     int start = PageSize * (PageIndex - );
                     //结束数
                     int limit = PageSize;
                     ScoreDoc[] hits = topDocs.scoreDocs;
                     List<Questions> list = new List<Questions>();
                     int counter = ;
                     TotalCount = topDocs.totalHits;//获取Lucene索引里的记录总数                     //Lucene.Net.Highlight.SimpleHTMLFormatter simpleHTMLFormatter = new Lucene.Net.Highlight.SimpleHTMLFormatter("<em class=\"hl-l-t-main\">", "</em>");
                     //Lucene.Net.Highlight.Highlighter highlighter = new Lucene.Net.Highlight.Highlighter(simpleHTMLFormatter,new Lucene.Net.Highlight.QueryScorer(bq));                     foreach (ScoreDoc sd in hits)//遍历搜索到的结果
                     {
                         try
                         {
                             Document doc = searcher.Doc(sd.doc);
                             string id = doc.Get("ID");
                             string title = doc.Get("Title");
                             string content = doc.Get("Contents");
                             string updatetime = doc.Get("AddTime");                             PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<em class=\"hl-l-t-main\">", "</em>");
                             PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new Segment());//搜索关键字高亮显示，上面的高亮样式自己写
                             highlighter.FragmentSize = Int32.MaxValue; //这里如果值小于搜索内容的长度的话，会导致搜索结果被截断，因此设置最大，根据需求来吧
                             string contentHighlight = highlighter.GetBestFragment(keyword2, content);
                             string titleHighlight = highlighter.GetBestFragment(keyword2, title);                             //string titleHighlight = highlighter.GetBestFragment(analyzer, "Title", title);                             //string contentHighlight = highlighter.GetBestFragment(analyzer, "Contents", content);                             title = string.IsNullOrEmpty(titleHighlight) ? title : titleHighlight;
                             content = string.IsNullOrEmpty(contentHighlight) ? content : contentHighlight;                             var model = new Questions
                             {
                                 ID = int.Parse(id),
                                 Title = title,
                                 Contents = content,
                                 AddTime = DateTime.Parse(updatetime)
                             };
                             list.Add(model);
                         }
                         catch (Exception ex)
                         {
                             //这里可以写错误日志
                         }
                         counter++;
                     }
                     return list;
                 }
                 //st.Stop();
             }
             catch (Exception e)
             {
                 TotalCount = ;
                 return null;
             }         }         /// <summary>
         /// 去除html标签
         /// </summary>
         /// <param name="StrHtml"></param>
         /// <returns></returns>
         public string GetNoHtml(string StrHtml)
         {
             string strText="";
             if (!string.IsNullOrEmpty(StrHtml))
             {
                 strText = System.Text.RegularExpressions.Regex.Replace(StrHtml, @"<[^>]+>", "");
                 strText = System.Text.RegularExpressions.Regex.Replace(strText, @"&[^;]+;", "");
                 strText = System.Text.RegularExpressions.Regex.Replace(strText, @"\\s*|\t|\r|\n", "");             }
             return strText;         }
         #endregion         #region 删除索引数据（根据id）
         /// <summary>
         /// 删除索引数据（根据id）
         /// </summary>
         /// <param name="id"></param>
         /// <returns></returns>
         public bool Delete(string id)
         {
             bool IsSuccess = false;
             Term term = new Term("id", id);
             //Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
             //Version version = new Version();
             //MultiFieldQueryParser parser = new MultiFieldQueryParser(version, new string[] { "name", "job" }, analyzer);//多个字段查询
             //Query query = parser.Parse("小王");             //IndexReader reader = IndexReader.Open(directory_luce, false);
             //reader.DeleteDocuments(term);
             //Response.Write("删除记录结果： " + reader.HasDeletions + "<br/>");
             //reader.Dispose();             IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
             writer.DeleteDocuments(term); // writer.DeleteDocuments(term)或者writer.DeleteDocuments(query);
             ////writer.DeleteAll();
             writer.Commit();
             //writer.Optimize();//
             IsSuccess = writer.HasDeletions();
             writer.Close();
             return IsSuccess;
         }
         #endregion         #region 删除全部索引数据
         /// <summary>
         /// 删除全部索引数据
         /// </summary>
         /// <returns></returns>
         public bool DeleteAll()
         {
             bool IsSuccess = true;
             try
             {
                 IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
                 writer.DeleteAll();
                 writer.Commit();
                 writer.Optimize();//
                 IsSuccess = writer.HasDeletions();
                 writer.Close();
             }
             catch
             {
                 IsSuccess = false;
             }
             return IsSuccess;
         }
         #endregion         #region directory_luce
         private Lucene.Net.Store.Directory _directory_luce = null;
         /// <summary>
         /// Lucene.Net的目录-参数
         /// </summary>
         public Lucene.Net.Store.Directory directory_luce
         {
             get
             {
                 if (_directory_luce == null) _directory_luce = Lucene.Net.Store.FSDirectory.Open(directory);
                 return _directory_luce;
             }
         }
         #endregion         #region directory
         private System.IO.DirectoryInfo _directory = null;
         /// <summary>
         /// 索引在硬盘上的目录
         /// </summary>
         public System.IO.DirectoryInfo directory
         {
             get
             {
                 if (_directory == null)
                 {
                     string dirPath = HttpContext.Current.Server.MapPath("/LuceneDic");
                     if (System.IO.Directory.Exists(dirPath) == false)
                         _directory = System.IO.Directory.CreateDirectory(dirPath);
                     else
                         _directory = new System.IO.DirectoryInfo(dirPath);
                 }
                 return _directory;
             }
         }
         #endregion         #region analyzer
         private Analyzer _analyzer = null;
         /// <summary>
         /// 分析器
         /// </summary>
         public Analyzer analyzer
         {
             get
             {
                 //if (_analyzer == null)
                 {
                    // _analyzer = new Lucene.Net.Analysis.PanGu.PanGuAnalyzer();//弃用盘古分词，感觉有点问题，测试下来没有自带分词好用,也有可能是好用的，但是之前用的高版本lucene.net，导致分词失效
                     _analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
                 }
                 return _analyzer;
             }
         }
         #endregion         #region version
         private static Lucene.Net.Util.Version _version = Lucene.Net.Util.Version.LUCENE_29;
         /// <summary>
         /// 版本号枚举类
         /// </summary>
         public Lucene.Net.Util.Version version
         {
             get
             {
                 return _version;
             }
         }
         #endregion
         /// <summary>
         /// 盘古分词的配置文件
         /// </summary>
         protected static string PanGuXmlPath
         {
             get
             {
                 return HttpContext.Current.Server.MapPath("/PanGu/PanGu.xml");
             }
         }
     }