Hey all, I have a fairly odd case of duplicate documents in
our solr index
(See attached xml sample). THe index is roughtly 35k in
documents. The only
way I've found to fix the problem is to run a delete
statement by id, which
deletes both, I can then re-index that one document. This
happened
previosuly but it ended up being an issue with
case-sensitivity but this
time the id's appear identical!
Any assistance in tracking this down would be appeciated! I
can provide any
other logs if nesseary.
Thanks,
Dan
Sample Select Query:
<?xml version="1.0"
encoding="UTF-8" ?>
- <response>
- <lst name="responseHeader">
<int name="status">0</int>
<int name="QTime">0</int>
</lst>
- <result name="response"
numFound="2" start="0">
- <doc>
- <arr name="categoryId">
<int>151</int>
<int>962</int>
<int>1493</int>
<int>1830</int>
</arr>
- <arr name="finish">
<str>N/A</str>
</arr>
<bool
name="hasDigiCast">false</bool>
<bool
name="hasDigiVista">false</bool>
<str
name="id">hr-802waclighting</str>
- <arr name="inStock">
<bool>false</bool>
</arr>
<bool name="isNew">false</bool>
<bool name="isTopSeller">true</bool>
<str name="manufacturer">wac
lighting</str>
- <arr name="masterFinish">
<str>not applicable</str>
</arr>
<date
name="modifiedDate">2007-10-15T23:10:01.510Z<
;/date>
<bool name="onSale">false</bool>
<int name="popularity">1683</int>
- <arr name="price">
<float>53.91</float>
</arr>
<date
name="productAddDate">2007-07-05T00:00:00Z</
date>
<str name="productID">HR-802</str>
<str name="productTitle">Low Voltage
Miniature Housing for Recessed
Lighting Fixture</str>
<str name="series">low voltage miniature
housings</str>
- <arr name="sku">
<str />
</arr>
<str name="theme" />
- <arr name="upc">
<str />
</arr>
</doc>
- <doc>
- <arr name="categoryId">
<int>151</int>
<int>962</int>
<int>1493</int>
<int>1830</int>
</arr>
- <arr name="finish">
<str>N/A</str>
</arr>
<bool
name="hasDigiCast">false</bool>
<bool
name="hasDigiVista">false</bool>
<str
name="id">hr-802waclighting</str>
- <arr name="inStock">
<bool>false</bool>
</arr>
<bool name="isNew">false</bool>
<bool name="isTopSeller">true</bool>
<str name="manufacturer">wac
lighting</str>
- <arr name="masterFinish">
<str>not applicable</str>
</arr>
<date
name="modifiedDate">2007-11-02T15:33:21.154Z<
;/date>
<bool name="onSale">false</bool>
<int name="popularity">1683</int>
- <arr name="price">
<float>53.91</float>
</arr>
<date
name="productAddDate">2007-07-05T00:00:00Z</
date>
<str name="productID">HR-802</str>
<str name="productTitle">Low Voltage
Miniature Housing for Recessed
Lighting Fixture</str>
<str name="series">low voltage miniature
housings</str>
- <arr name="sku">
<str />
</arr>
<str name="theme" />
- <arr name="upc">
<str />
</arr>
</doc>
</result>
</response>
Schema.xml
<field name="id" type="string"
indexed="true" stored="true"/>
<field name="sku" type="textTight"
indexed="true" stored="true"
multiValued="true"/>
<field name="upc" type="textTight"
indexed="true" stored="true"
multiValued="true"/>
.....
<!-- field to use to determine and enforce document
uniqueness. -->
<uniqueKey>id</uniqueKey>
<!-- field for the QueryParser to use when an explicit
fieldname is absent
-->
<defaultSearchField>text</defaultSearchField>
<!-- SolrQueryParser configuration:
defaultOperator="AND R"
-->
<solrQueryParser defaultOperator="OR"/>
--
View this message in context: http://www.nabble.com/SOLR-1.2---
Duplicate-Documents---tf4762687.html#a13621332
Sent from the Solr - User mailing list archive at
Nabble.com.
|