Commit ce3fda11 authored by Tobias Steiner's avatar Tobias Steiner
Browse files

Load the needed properties from wikidata

parent 1ef24865
import { queryTaxonomy } from 'wikidata-taxonomy';
import util = require('util');
export interface InterfaceProperty {
name: string;
qid: string;
}
export class BootstrapWikidata {
/**
* An array of properties to load from wikidata
* @type {any[]}
*/
private properties = [];
/**
* Load a list of properties with subclasses from wikidata
* @param {InterfaceProperty[]} properties
* @returns {Promise<void>}
*/
public async init(properties: InterfaceProperty[]){
await properties.forEach(async (property) => {
const sublclasses = await this.getProperties(property.qid);
// add the main property to the list
sublclasses.push(property.qid);
this.properties.push({name: property.name, properties: sublclasses});
});
}
/**
* Get a collection of subclasses for a named property
* @param {string} name
* @returns {string[]}
*/
public get(name: string): string[] {
const property = this.properties.find((p) => {
return p.name === name;
});
return property.properties;
}
/**
* Get a property with his subclasses from wikidata
* @param {string} property
* @returns {Promise<any>}
*/
private async getProperties(property: string) {
// P1647 = subproperty
return await queryTaxonomy(property, {brief: true, property: ['P1647']})
.then((taxonomy) => {
const properties = [];
taxonomy.concepts.forEach((concept) => {
const qid = concept.notation[0];
properties.push(qid);
});
return properties.filter((d, i, a) => {
return a.indexOf(d) === i;
});
})
.catch((error) => {
console.log('error while collectin Qids');
console.log(error);
});
}
}
......@@ -4,7 +4,7 @@ import ReporterCounter from 'geolinker-common/dist/stream/reporter-counter';
import * as KafkaAvro from 'kafka-avro';
import nodeCleanup = require('node-cleanup');
import ProducerStream = require('node-rdkafka/lib/producer-stream');
import Debug from './debug';
import {BootstrapWikidata, InterfaceProperty} from './bootstrap-wikidata';
import StreamProducerPreparer from './geolinker-stream-preparer';
import WikiNormalizer from './wiki-normalizer';
......@@ -36,6 +36,17 @@ nodeCleanup( (exitCode, signal) => {
}, 100);
});
/**
* A list of properties to load from wikidata
* @type {InterfaceProperty[]}
*/
const properties = [
{name: 'start', qid: 'P580' },
{name: 'end', qid: 'P582' },
];
const wikidataProperties = new BootstrapWikidata();
const wpPromise = wikidataProperties.init(properties);
/**
* Init KafkaInstance
* @type {}
......@@ -45,6 +56,7 @@ kafka.init().then(() => {
Promise.all([
kafka.getProducer(),
kafka.getConsumerStream(),
wpPromise,
])
.then((streams) => {
core.getLogger().info('Producer and consumer are ready to go');
......@@ -56,7 +68,7 @@ kafka.init().then(() => {
*/
consumerStream
.pipe(new ReporterCounter({ objectMode: true}, core.getReporter(), 'read'))
.pipe(new WikiNormalizer({objectMode: true}, core.getLogger()))
.pipe(new WikiNormalizer({objectMode: true}, core.getLogger(), wikidataProperties))
.pipe(new ReporterCounter({ objectMode: true}, core.getReporter(), 'write'))
.pipe(new StreamProducerPreparer({ objectMode: true, topic: core.getNconf().get('producer:topics'), partition: -1 }))
.pipe(new ProducerStream(producer, {objectMode: true, topic: core.getNconf().get('producer:topics')}));
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment