Commit f012c80f authored by tobinski's avatar tobinski
Browse files

Refactor the utils

parent 1a04ff67
export interface IPropertyCollection {
get(name: string): string[];
add(name: string, subclass: string[]);
}
/**
* Simple class to manage a collection of properties
*/
export class PropertyCollection implements IPropertyCollection{
/**
* An array of properties to load from wikidata
* @type {any[]}
*/
private properties = [];
/**
* Get a collection of subclasses for a named property
* @param {string} name
* @returns {string[]}
*/
public get(name: string): string[] {
const property = this.properties.find((p) => {
return p.name === name;
});
return property.subclasses;
}
/**
* Add a collection of subclasses for a named propertiy
* @param {string} name
* @param {string[]} subclasses
* @returns {void}
*/
public add(name: string, subclasses: string[]): void {
this.properties.push({name, subclasses});
}
}
import {queryTaxonomy} from 'wikidata-taxonomy';
import {PropertyCollection} from './property-collection';
export interface IProperty {
name: string;
qid: string;
}
export class WikidataProperties {
/**
* Get a property with his subclasses from wikidata
* @param {string} property
* @param {{}} query f.e. P1647 = subproperty
* @returns {Promise<any>}
*/
public static async getProperties(property: string, query: any = {brief: true}) {
try {
const taxonomy = await queryTaxonomy(property, query);
const props = [];
taxonomy.concepts.forEach((concept) => {
const qid = concept.notation[0];
props.push(qid);
});
return props.filter((d, i, a) => {
return a.indexOf(d) === i;
});
} catch (error) {
console.log('Error while collecting Q-numbers from wikidata');
console.log(error);
return [];
}
}
/**
* Load a list of properties with subclasses from wikidata
* @param {IProperty[]} props
* @returns {Promise<PropertyCollection>}
*/
public static async init(props: IProperty[]) {
const properties = new PropertyCollection();
for (const property of props) {
const sublclasses = await WikidataProperties.getProperties(property.qid, {brief: true, property: ['P1647']});
// add the main property to the list
sublclasses.push(property.qid);
properties.add(property.name, sublclasses);
}
return properties;
}
}
......@@ -22,7 +22,7 @@ export interface ILink {
class WikidataAnalyserTransformer extends Transform {
/**
* Properties to extract from wiki data structure from lena
* PropertyCollection to extract from wiki data structure from lena
* todo: get this dynamic
* @type {string[]}
*/
......
......@@ -3,7 +3,8 @@ import * as geohash from 'ngeohash';
import * as ramda from 'ramda';
import {Transform} from 'stream';
import {LoggerInstance} from 'winston';
import {timeToDate, WikidataProperties} from './wikidata-utils';
import {IPropertyCollection} from './utils/property-collection';
import {timeToDate} from './wikidata-utils';
class WikidataNormalizerTransformer extends Transform {
/**
......@@ -13,11 +14,11 @@ class WikidataNormalizerTransformer extends Transform {
private languages = ['de', 'en', 'fr', 'it'];
/**
* Properties describe the end of a geo instance.
* PropertyCollection describe the end of a geo instance.
* We fetch them from wikidata
* @type {null}
* @type {IPropertyCollection}
*/
private properties: WikidataProperties;
private properties: IPropertyCollection;
/**
* Logger
......@@ -27,7 +28,7 @@ class WikidataNormalizerTransformer extends Transform {
constructor(options) {
super(options);
this.logger = options.logger;
this.properties = options.wikidataPropertiesInstance;
this.properties = options.wikidataPropertyCollection;
}
/**
......
import { queryTaxonomy } from 'wikidata-taxonomy';
export interface IProperty {
name: string;
qid: string;
}
export class WikidataProperties {
/**
* Get a property with his subclasses from wikidata
* @param {string} property
* @param {{}} query f.e. P1647 = subproperty
* @returns {Promise<any>}
*/
public static async getProperties(property: string, query: any = {brief: true}) {
try {
const taxonomy = await queryTaxonomy(property, query);
const props = [];
taxonomy.concepts.forEach((concept) => {
const qid = concept.notation[0];
props.push(qid);
});
return props.filter((d, i, a) => {
return a.indexOf(d) === i;
});
} catch (error) {
console.log('Error while collecting Q-numbers from wikidata');
console.log(error);
return [];
}
}
/**
* An array of properties to load from wikidata
* @type {any[]}
*/
private properties = [];
/**
* Load a list of properties with subclasses from wikidata
* @param {IProperty[]} props
* @returns {Promise<void>}
*/
public async init(props: IProperty[]) {
for (const property of props) {
const sublclasses = await WikidataProperties.getProperties(property.qid, {brief: true, property: ['P1647']});
// add the main property to the list
sublclasses.push(property.qid);
this.properties.push({name: property.name, properties: sublclasses});
}
}
/**
* Get a collection of subclasses for a named property
* @param {string} name
* @returns {string[]}
*/
public get(name: string): string[] {
const property = this.properties.find((p) => {
return p.name === name;
});
return property.properties;
}
/**
* set a list of properties f.e. manual collected data
* @param props
*/
public setProperties(props: string[]) {
this.properties = [...this.properties, ...props];
}
}
import {IProperty, WikidataProperties} from './utils/wikidata-properties';
/**
* Grab all the subclasses of start and end properties from wikidata.
* We want to find all possible annotations and return an instance
*/
export const wikidataTimePropertiesLoader = new Promise(async (resolve) => {
const properties = [
{name: 'start', qid: 'P580'},
{name: 'end', qid: 'P582'},
];
const properties: IProperty[] = [
{name: 'start', qid: 'P580'},
{name: 'end', qid: 'P582'},
];
const wikidataProperties = new WikidataProperties();
await wikidataProperties.init(properties);
resolve(wikidataProperties);
});
export const wikidataTimePropertiesLoader = WikidataProperties.init(properties);
/**
* Get all subclasses of geographical entities from wikidata (Q2221906 = geographic location)
......
import chai = require('chai');
import chaiAsPromised = require('chai-as-promised');
import fs = require('fs');
import Core, {InterfaceCore} from 'geolinker-common/dist/core';
import ReporterCounter from 'geolinker-common/dist/stream/reporter-counter';
......@@ -24,11 +23,10 @@ let wikidataTimeProperties;
describe('Test WikiNormalizer', () => {
before(function(done) {
this.timeout(5000);
this.timeout(10000);
chai.should();
chai.use(chaiAsPromised);
wikidataTimePropertiesLoader.then((instance) => {
wikidataTimeProperties = instance;
wikidataTimePropertiesLoader.then((propertyCollection) => {
wikidataTimeProperties = propertyCollection;
done();
});
});
......@@ -37,7 +35,7 @@ describe('Test WikiNormalizer', () => {
const options = {
objectMode: true,
logger: coreMock.getLogger(),
wikidataPropertiesInstance: wikidataTimeProperties,
wikidataPropertyCollection: wikidataTimeProperties,
};
const normalizer = new WikidataNormalizerTransformer(options);
......@@ -56,13 +54,14 @@ describe('Test WikiNormalizer', () => {
},
};
normalizer.write(chunck);
normalizer.end();
});
it('Try to transform in a pipline', (done) => {
const options = {
objectMode: true,
logger: coreMock.getLogger(),
wikidataPropertiesInstance: wikidataTimeProperties,
wikidataPropertyCollection: wikidataTimeProperties,
};
const normalizer = new WikidataNormalizerTransformer(options);
const mockReporter = new MockReporter();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment