close

利用 Lucene.Net 建完索引後,如果想檢視程式到底如何建立索引,可以利用現成 API 來檢查索引裡的 Term。所謂的 Term,用一個例子來說明就是,假設有一個句子”This is a book.”,用 Lucene.Net 建完索引後,正常情況下會將這個句子拆成”This”、”is”、”a”、”book”然後再存進檔案裡。當然,如果有過濾 stop word,那就只有”book”會被記錄。

接下來示範如何列出索引裡的 Term。

        public void StartIndex(List<Product> ProductList)
        {
            DirectoryInfo dirInfo = new DirectoryInfo(this.IndexPath);
            FSDirectory dir = FSDirectory.Open(dirInfo);
            IndexWriter indexWriter = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29), true, IndexWriter.MaxFieldLength.UNLIMITED);

            foreach (Product product in ProductList)
            {
                Document doc = new Document();
                Field field = new Field("SaleName", product.SaleName, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES);
                Field field2 = new Field("Desc", product.Desc, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES);
                NumericField field3 = new NumericField("Price", Field.Store.YES, true);
                field3.SetIntValue(product.Price);

                doc.Add(field);
                doc.Add(field2);
                doc.Add(field3);
                indexWriter.AddDocument(doc);
            }

            indexWriter.Close();
        }

上面的程式碼示範如何建立索引,這裡只需注意,宣告 Field 物件時,要設定 Field.TermVector.Yes 這個屬性。

接下來列出索引裡的 Term。

        public List<string> ListTerms()
        {
            DirectoryInfo dirInfo = new DirectoryInfo(this.IndexPath);
            FSDirectory dir = FSDirectory.Open(dirInfo);
            IndexReader reader = IndexReader.Open(dir, true);
            List<string> list = new List<string>();
            Term t = new Term("SaleName");
            TermEnum termEnum = reader.Terms(t);

            while (termEnum.Next())
            {
                Term term = termEnum.Term();
                if (term != null)
                    list.Add(term.Field() + ":" + term.Text());
            }

            return list;
        }

程式碼並沒有特別難,只要注意 term 這個物件是否為 null 就可以了。

然後是 main function。

        static void Main(string[] args)
        {
            LuceneClass lucene = new LuceneClass(@"INDEX_DIR");

            List<Product> list = new List<Product>();
            list.Add(new Product { SaleName = "iphone", Desc = "Apple's iphone", Price = 100 });
            list.Add(new Product { SaleName = "iphone4s", Desc = "Apple's iphone", Price = 120 });
            list.Add(new Product { SaleName = "iphone 4s", Desc = "Apple's iphone", Price = 130 });
            list.Add(new Product { SaleName = "iphone 3gs", Desc = "Apple's iphone", Price = 90 });
            list.Add(new Product { SaleName = "愛瘋機", Desc = "中國出品", Price = 100 });
            list.Add(new Product { SaleName = "蘋果機", Desc = "中國出品", Price = 100 });
            list.Add(new Product { SaleName = "HTC Hero", Desc = "Android Phone", Price = 50 });

            lucene.StartIndex(list);

            List<string> result = lucene.ListTerms();

            foreach (string item in result)
            {
                Console.WriteLine(item);
            }
        }

執行結果

2012-08-05_152430

arrow
arrow
    全站熱搜

    卑微研究生 發表在 痞客邦 留言(0) 人氣()