Commit a8927598 authored by tobinski's avatar tobinski

Resolve hls links to a generic format and try to get the corresponding...

Resolve hls links to a generic format and try to get the corresponding wikidata q-number from histhub
parent 3f40e63b
import axios from 'axios'
class ApiQueryDispatcher {
private readonly endpoint = 'https://api.geolinker.histhub.ch/v2/sameas/';
public query( apiQuery: string ) : Promise<any> {
const fullUrl = this.endpoint + encodeURIComponent( apiQuery ) + '?depth=2';
return axios.get( fullUrl ).then( body => body.data );
}
}
const queryDispatcher = new ApiQueryDispatcher();
export default queryDispatcher
import { Transform } from 'stream';
import {InterfaceCore} from 'geolinker-common/dist/core';
import queryDispatcher from './sparql-query-dispatcher'
import sparqlQueryDispatcher from './sparql-query-dispatcher'
import apiQueryDispatcher from './api-query-dispatcher'
/**
* Prepare a value for kafka ProducerStream
*/
......@@ -24,11 +24,10 @@ class EthAnalyserTransformer extends Transform {
* @private
*/
public async _transform(chunk, encoding, callback) {
const data = chunk;
let to : string[] = [];
// there are two link fields
if(data.link !== '' && typeof data.link !== 'undefined') {
// todo: transform to q-Number
to.push(data.link);
}
if(data.link2 !== '' && typeof data.link2 !== 'undefined') {
......@@ -36,22 +35,38 @@ class EthAnalyserTransformer extends Transform {
}
if(to.length > 0) {
// transform de.wikipedia.org links to wikidata numbers
to = await Promise.all(to.map(async (u) => {
// transform wikidata
if (u.match("wikipedia")) {
return await this.toQnumber(u);
return await EthAnalyserTransformer.toQnumber(u.trim());
}
// transform hls
if (u.match("hls-dhs-dss")) {
return await EthAnalyserTransformer.hlsToQnumber(u.trim());
}
return u;
// currently ignore all other links. There may be links to other sites
return "";
}));
to = to
.map(u => u.trim())
.filter(u => u !== "");
// remove empty data
to = to.filter(u => u !== "");
// make them unique
to = [...new Set(to)];
}
if(to.length > 0) {
// report error. Something is wrong. The eth is referencing to more than one concept
if (to.length > 1) {
// in such a situation we take the wikipedia link for further processing.
// because it seems more exact. This is the first promise -> key 0
console.log(`Links to two different wikidata concepts from http://ethorama.library.ethz.ch/de/node/${data.id} to wikipedia ${to[0]} and Hls ${to[1]}`);
}
// build primo link
const qnumber = to[0].match(/(\d.+)/);
const primoLink = `https://search.library.ethz.ch/primo-explore/search?query=any,contains,%5Bwd~2Fplace%5DQ${qnumber[0]}&tab=default_tab&search_scope=default_scope&vid=DADS&lang=de_DE&offset=0`;
const result = {
from: `http://ethorama.library.ethz.ch/de/node/${data.id}`,
to,
from: primoLink,
to: [to[0]],
relation: {
type: 'same_as',
author: this.author,
......@@ -67,7 +82,7 @@ class EthAnalyserTransformer extends Transform {
* Transform a de.wikipeda.org link into a wikidata Q link
* @param link
*/
private async toQnumber(link: string) : Promise<string>{
private static async toQnumber(link: string) : Promise<string>{
const lemma = link.split("/");
const sparqlQuery = `
#Wikidata items of Wikipedia articles
......@@ -84,14 +99,37 @@ class EthAnalyserTransformer extends Transform {
schema:name ?lemma.
}`;
const res = await queryDispatcher.query( sparqlQuery ) ;
const res = await sparqlQueryDispatcher.query( sparqlQuery ) ;
if(res.results.bindings.length > 0) {
const entity = res.results.bindings[0].item.value.split("/");
// we wanna control the link
return `https://www.wikidata.org/wiki/${entity[entity.length - 1]}`;
}
return "";
}
/**
* Turn hls links into a q-number based on histhub
* @param link
*/
private static async hlsToQnumber(link: string) : Promise<string> {
// the problem ist, we use different urls for hls
// turn new urls into the old format
if(link.match("de/articles")) {
const stringId: string[] = link.match(/(\d{6})/);
const id = parseInt(stringId[0]);
link = `http://www.hls-dhs-dss.ch/textes/d/D${id}.php`;
}
const links = await apiQueryDispatcher.query(link);
const wikidata = links.data.resolverneo4j.links
.filter((l) => {
return l.hasOwnProperty('Wikidata')
})
.map(l => l.Wikidata);
if(wikidata.length > 0) {
return wikidata[0]
}
return "";
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment