Commit 59dab60b authored by Tobinsk's avatar Tobinsk
Browse files

Merge branch 'fix-missing-matching-info' into 'master'

If we merge two  or more concordances combine all the matching information

Closes #207

See merge request metagrid-go/metagrid-go!189
parents 7742915a 6c299bd3
......@@ -87,7 +87,7 @@ push:
stage: build
script:
- docker pull $CI_REGISTRY_IMAGE:latest || true
- docker build --pull -t $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA .
- docker build --cache-from --pull -t $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA .
- docker push $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
# push latest on branch master
......
......@@ -2,7 +2,13 @@
curl -X DELETE localhost:9200/metagrid -H "content-type: Application/json"
curl -X DELETE localhost:9200/matches -H "content-type: Application/json"
curl -X DELETE localhost:9200/crawl -H "content-type: Application/json"
curl -X PUT localhost:9200/metagrid -d @./01-metagrid-mapping.json -H "content-type: Application/json"
curl -X PUT localhost:9200/matches -d @./01-metagrid-matches-mapping.json -H "content-type: Application/json"
curl -X PUT localhost:9200/crawl -d @./01-metagrid-crawl-mapping.json -H "content-type: Application/json"
curl -X DELETE localhost:9200/test -H "content-type: Application/json"
curl -X DELETE localhost:9200/.kibana_task_manager_1 -H "content-type: Application/json"
curl -X DELETE localhost:9200/.apm-agent-configuration -H "content-type: Application/json"
curl -X DELETE localhost:9200/matches-test -H "content-type: Application/json"
curl -X DELETE localhost:9200/crawl-test -H "content-type: Application/json"
curl -X DELETE localhost:9200/.kibana_1 -H "content-type: Application/json"
#curl -X PUT localhost:9200/metagrid -d @./01-metagrid-mapping.json -H "content-type: Application/json"
#curl -X PUT localhost:9200/matches -d @./01-metagrid-matches-mapping.json -H "content-type: Application/json"
#curl -X PUT localhost:9200/crawl -d @./01-metagrid-crawl-mapping.json -H "content-type: Application/json"
# curl -X POST localhost:9200/metagrid/_doc -d @./02-metagrid-sample-data.json -H "content-type: Application/json"
......@@ -23,11 +23,6 @@ import (
log "github.com/sirupsen/logrus"
)
const (
// TestSize is the number of concordance we try to match in the test
TestSize = 200
)
// Service struct
type Merger struct {
storage Storage
......@@ -132,11 +127,18 @@ func (m *Merger) InMemory(concordances ...Concordance) (Concordance, error) {
if con.User == "" {
con.User = concordance.User
}
// append matching information. We make them later unique
con.Matches = append(con.Matches, concordance.Matches...)
con.RefusedMatches = append(con.RefusedMatches, concordance.RefusedMatches...)
// merge persons
con.Persons = append(con.Persons, concordance.Persons...)
}
// check constraints of persons
var err error
con.Persons, err = m.runChecks(unique(con.Persons))
// make matching infos unique
con.Matches = uniqueStrings(con.Matches)
con.RefusedMatches = uniqueStrings(con.RefusedMatches)
if err != nil {
return Concordance{}, err
}
......@@ -208,6 +210,19 @@ func unique(persons []Person) []Person {
return p
}
// make a slice of strings unique
func uniqueStrings(values []string) []string {
m := make(map[string]struct{}, len(values))
result := make([]string, len(values))
for _, v := range values {
m[v] = struct{}{}
}
for k := range m {
result = append(result, k)
}
return result
}
// informationQuality defines a metric to decide how good the information are. We decide on the base of empty fields
// and quality of the dates. But to be honest this is not very accurate
func informationQuality(person Person) float64 {
......
......@@ -103,6 +103,7 @@ func TestService_Merge(t *testing.T) {
ForwardID: "",
Persons: persons,
RefusedMatches: nil,
Matches: []string{"Match3"},
}
// mock founded match
......@@ -113,6 +114,8 @@ func TestService_Merge(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, len(c3.Persons), 3)
assert.Contains(t, c3.Persons, person)
assert.Contains(t, c3.Matches, c2.Matches[0])
assert.Contains(t, c3.RefusedMatches, c1.RefusedMatches[0])
}
func TestService_MergeId(t *testing.T) {
......@@ -188,7 +191,8 @@ func mockConcordance() Concordance {
User: "Sacha",
ForwardID: "",
Persons: persons,
RefusedMatches: nil,
RefusedMatches: []string{"Match10"},
Matches: []string{"Match1", "Match2"},
}
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment