Commit b88d1042 authored by Tobinsk's avatar Tobinsk
Browse files

Merge branch '6-wrong-topic' into 'master'

Resolve "Wrong topic"

Closes #6

See merge request histhub/wikidata-normalizer!12
parents 9a93e376 9c32e522
......@@ -838,6 +838,16 @@
"verror": "1.10.0"
}
},
"julian-gregorian": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/julian-gregorian/-/julian-gregorian-1.0.0.tgz",
"integrity": "sha512-zhZg0a1ErOR3FkZIF7mjJ39ZKf4aNFkU3/3/TexT2GD3z3f1fOcrVdP6Lwdz0x5PZg9gMDsuWsroX+9oFaHrqg=="
},
"juliandate": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/juliandate/-/juliandate-1.0.0.tgz",
"integrity": "sha1-ogzwYmTfD1RTJ2nylXv2GzJQVwE="
},
"just-extend": {
"version": "1.1.27",
"resolved": "https://registry.npmjs.org/just-extend/-/just-extend-1.1.27.tgz",
......@@ -1045,6 +1055,11 @@
"integrity": "sha1-PCV/mDn8DpP/UxSWMiOeuQeD/2Y=",
"optional": true
},
"momentjs": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/momentjs/-/momentjs-2.0.0.tgz",
"integrity": "sha1-c9+QS0+kGPbjxgXoMc727VUY69Q="
},
"ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
......
......@@ -22,6 +22,9 @@
"chai-as-promised": "^7.1.1",
"concurrently": "^3.5.1",
"geolinker-common": "git+https://gitlab+deploy-token-1:vnsdCm_t84QGVA2U4kw4@source.dodis.ch/histhub/geolinker-common.git",
"julian-gregorian": "^1.0.0",
"juliandate": "^1.0.0",
"momentjs": "^2.0.0",
"node-cleanup": "^2.1.2",
"node-rdkafka": "^2.3.3",
"ramda": "^0.25.0",
......
import converter from 'julian-gregorian';
import * as ramda from 'ramda';
import {Transform} from 'stream';
import {LoggerInstance} from 'winston';
import {BootstrapWikidata} from './bootstrap-wikidata';
import toDateObject from './wikidata_time_to_date_object';
class WikiNormalizer extends Transform {
/**
......@@ -81,9 +83,12 @@ class WikiNormalizer extends Transform {
*/
private getEnd(obj) {
let time = null;
let calendar = null;
this.properties.get('end').some((property) => {
time = this.getValue(obj, property, 'time');
if (typeof time !== 'undefined') {
calendar = this.getValue(obj, property, 'calendarmodel');
time = this.convertTimestamp(time, calendar);
return true;
} else {
time = null;
......@@ -100,9 +105,12 @@ class WikiNormalizer extends Transform {
private getStart(obj) {
// get all startdates P580
let time = null;
let calendar = null;
this.properties.get('start').some((property) => {
time = this.getValue(obj, property, 'time');
if (typeof time !== 'undefined') {
calendar = this.getValue(obj, property, 'calendarmodel');
time = this.convertTimestamp(time, calendar);
return true;
} else {
time = null;
......@@ -111,6 +119,29 @@ class WikiNormalizer extends Transform {
return time;
}
/**
* Convert a wikidata date string into a timestamp
* @param {string} time
* @param {string} calendar
* @returns {number}
*/
private convertTimestamp(time: string, calendar: string) {
if (time !== null) {
// build timestamp
// wikidata uses julian and gregorian calendar. We need to transform the dates into a common timestamp
// more info: https://www.wikidata.org/wiki/Help:Dates
// Q1985727 = gregorianCalendar
// Q1985786 == julian calendar
let date: Date = toDateObject(time);
if (calendar.indexOf('Q1985786') > -1) {
date = new Date(converter.fromJulianToGregorian(date.getFullYear(), date.getMonth() - 1, date.getDate()));
}
// get timestamp (seconds from start)
return date.getTime() / 1000;
}
}
/**
* get all ids of the instanceOf
* @param obj
......
/**
* We got this mpodule from
* https://github.com/maxlath/wikidata-sdk/blob/master/lib/helpers/wikidata_time_to_date_object.js
*/
interface InterfaceWikidataTime {
time: string;
}
const timeToDate = (wikidataTime: string | InterfaceWikidataTime) => {
// Also accept claim datavalue.value objects
if (typeof wikidataTime === 'object') {
wikidataTime = wikidataTime.time;
}
const sign = wikidataTime[0];
const rest = wikidataTime.slice(1);
const date = fullDateData(sign, rest);
if (date.toString() === 'Invalid Date') {
return parseInvalideDate(sign, rest);
} else {
return date;
}
};
const fullDateData = (sign, rest) => {
return sign === '-' ? negativeDate(rest) : positiveDate(rest);
};
const positiveDate = (rest) => new Date(rest);
const negativeDate = (rest) => {
const year = rest.split('-')[0];
let date;
// Using ISO8601 expanded notation for negative years: adding 2 leading zeros
// when needed. Can't find the documentation again, but testing
// with `new Date(date)` gives a good clue of the implementation
if (year.length === 4) {
date = `-00${rest}`;
} else if (year.length === 5) {
date = `-0${rest}`;
} else {
date = `-${rest}`;
}
return new Date(date);
};
const parseInvalideDate = (sign, rest) => {
// This is probably a date of unsuffisient precision
// such as 1953-00-00T00:00:00Z, thus invalid
// It should at least have a year, so let's fallback to ${year}-01-01
const year = rest.split('T')[0].split('-')[0];
return fullDateData(sign, year);
};
export default timeToDate;
......@@ -3,13 +3,15 @@ import chaiAsPromised = require('chai-as-promised');
import fs = require('fs');
import Core, {InterfaceCore} from 'geolinker-common/dist/core';
import ReporterCounter from 'geolinker-common/dist/stream/reporter-counter';
import converter from 'julian-gregorian';
import {describe, it} from 'mocha';
import nconf = require('nconf');
import {BootstrapWikidata} from '../src/bootstrap-wikidata';
import WikiNormalizer from '../src/wiki-normalizer';
import toDateObject = require('../src/wikidata_time_to_date_object.js');
import MockReadable from './mock-readable';
import MockReporter from './mock-reporter';
import MockWriteable from './mock-writeable';
import {BootstrapWikidata} from '../src/bootstrap-wikidata';
process.env['log-dir'] = '/tmp';
process.env.reporter_url = 'localhost:9999';
nconf.set('log-dir', './log');
......@@ -105,4 +107,20 @@ describe('Test WikiNormalizer', () => {
done();
});
});
it('Transform dates', (done) => {
const time: Date = toDateObject('+1838-01-01T00:00:00Z');
const time1: Date = toDateObject('-1838-01-01T00:00:00Z');
// test conversion of positive and negative dates
(time.getTime() / 1000).should.be.eq(-4165516800);
(time1.getTime() / 1000).should.be.eq(-120168835200);
// convert from julian to gregorian
const gregString = converter.fromJulianToGregorian(time.getFullYear(), time.getMonth() + 1, time.getDate());
// get timestamp
const converted = (new Date(gregString)).getTime();
// check julian to gregorian calendar
converted.should.be.eq((new Date('1838-1-13')).getTime());
done();
});
});
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment