Commit 45f40dcc authored by Tobias Steiner's avatar Tobias Steiner
Browse files

Initial commit

parents
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
# nyc test coverage
.nyc_output
# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# TypeScript v1 declaration files
typings/
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variables file
.env
# next.js build output
.next
dist/
.idea/
services:
- docker:dind
variables:
IMAGE_TAG: $CI_REGISTRY/$CI_IMAGE:$CI_COMMIT_REF_NAME
LATEST_TAG: $CI_REGISTRY/$CI_IMAGE:latest
cache:
paths:
- node_modules/
build:
only:
- master
image: docker:latest
services:
- docker:dind
stage: build
script:
- docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
- docker build -t $IMAGE_TAG .
- docker tag $IMAGE_TAG $LATEST_TAG
- docker push $IMAGE_TAG
- docker push $LATEST_TAG
FROM node:10-slim
# add basic libs
RUN apt-get update && apt-get install -y \
bash \
g++ \
gcc \
ca-certificates \
make \
python \
bash \
git
# Create app directory
RUN mkdir -p /usr/local/app
# Move to the app directory
WORKDIR /usr/local/app
# copy app to the container
COPY package.json package-lock.json config.json tsconfig.json /usr/local/app/
COPY src /usr/local/app/src
COPY data /usr/local/app/data
# Install dependencies
RUN npm install
# build stuff
RUN npm run build
# run app
CMD node dist/index.js
{
"kafka": {
"broker": "localhost:29092",
"schema-registry": "http://localhost:8081",
"fetchAllVersions": true
},
"producer": {
"geolinker" : {
"config": {
"batch.num.messages": 1000,
"message.send.max.retries": 5,
"retry.backoff.ms": 5000,
"message.max.bytes": 10485760
},
"topics": "geolinker",
"partition": -1,
"author": "bot#1"
},
"linker" : {
"config": {
"batch.num.messages": 1000,
"message.send.max.retries": 5,
"retry.backoff.ms": 5000,
"message.max.bytes": 10485760
},
"topics": "linker",
"partition": -1,
"author": "bot#1"
}
},
"log-dir": "/tmp",
"reporter": {
"url": "http://dashboard-api"
},
"csv": "https://www.muenzfunde.ch/downloads/thesauri/ch_municipalities.csv"
}
,tmen,pingu,29.01.2019 14:01,file:///home/tmen/.config/libreoffice/4;
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
{
"name": "fundmuenzen-producer",
"version": "1.0.0",
"description": "A tool send links from fundmuenzen to kafka",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"lint": "node_modules/.bin/tslint -c tslint.json 'src/**/*.ts'",
"build": "node_modules/.bin/tsc",
"watch": "concurrently -k -p \"[{name}]\" -n \"TypeScript,Node\" -c \"yellow.bold,cyan.bold,green.bold\" \"npm run watch-ts\" \"npm run watch-node\"",
"watch-node": "nodemon dist/index.js",
"watch-ts": "tsc -w"
},
"author": "",
"license": "ISC",
"dependencies": {
"@types/node": "^9.6.6",
"@types/proj4": "^2.5.0",
"concurrently": "^3.5.1",
"csv": "^3.1.0",
"csv-readable": "git+https://gitlab+deploy-token-4:Htg4YFbNZ28_dqSfrtxp@source.dodis.ch/histhub/csvreadable.git",
"geolinker-common": "git+https://gitlab+deploy-token-1:vnsdCm_t84QGVA2U4kw4@source.dodis.ch/histhub/geolinker-common.git",
"proj4": "^2.5.0",
"request": "^2.88.0",
"source-map-support": "^0.5.6",
"typescript": "^2.8.3"
},
"devDependencies": {
"tslint": "^5.9.1"
}
}
# CSV-link-producer
This small scripts streams the a collection of links, done by phillip to kafka linker topic. So we make the data available for the linker
## Docker
To build the image use following command. The images will fetch data from the dump, decompress it and stream a normalized version to kafka. The container based on linux alpine.
```bash
docker build -t source.dodis.ch:4577/histhub/csv-link-producer .
# Upload to the registry
docker push source.dodis.ch:4577/histhub/csv-link-producer
```
## Deploy to k8
We execute a job on k8 to stream the dump into kafka
```bash
kubectl create -f csv-link-producer.yaml
```
import {CsvReadable, InterfaceConcordance, InterfaceGeolinker} from 'csv-readable';
import * as proj4 from 'proj4';
class Fundmuenzen extends CsvReadable {
private header = true;
constructor(path) {
super(path);
}
public beforeTransform(record, callback) {
if (this.header) {
this.header = false;
return callback(null);
}
return callback(null, record);
}
public transformData(records: any): InterfaceGeolinker | null {
if (this.header) {
return null;
}
// we need lat,lon and the id
if (typeof records[1] === 'undefined' || records[1] === '' ||
typeof records[2] === 'undefined' || records[2] === '' ||
typeof records[3] === 'undefined' || records[3] === '') {
return null;
}
return {
id: records[3].toString(),
provider: 'fundmuenzen',
url: `https://www.muenzfunde.ch/id/country/ch/municipality/${records[3].toString()}/`,
name: records[0].toString(),
alternative_name: null,
typology: 'municipality',
country: records[14],
location: this.buildLocation(parseFloat(records[1]), parseFloat(records[2])),
modification_date: this.getDate(),
};
}
public transformLinks(records: any): InterfaceConcordance | null {
const urls = [];
const from = `https://www.muenzfunde.ch/id/country/ch/municipality/${records[3].toString()}/`;
// transform the identifiers to links
urls.push(this.core.getUriBuilder().urlResolver('fundmuenzen', `${records[3]}`));
urls.push(this.core.getUriBuilder().urlResolver('dodis', `${records[6]}`));
urls.push(this.core.getUriBuilder().urlResolver('ssrq', records[7]));
urls.push(this.core.getUriBuilder().urlResolver('hls', records[4]));
urls.push(this.core.getUriBuilder().urlResolver('ortsnamen', records[5]));
urls.push(this.core.getUriBuilder().urlResolver('gnd', records[9]));
urls.push(this.core.getUriBuilder().urlResolver('tgn', records[11]));
urls.push(this.core.getUriBuilder().urlResolver('wikidata', records[10]));
return {
from,
to: urls,
};
}
private getDate() {
const date = new Date();
return `${date.getFullYear()}-${date.getMonth()}-${date.getDate()}`;
}
private buildLocation(lon: number, lat: number) {
proj4.defs('EPSG:2056', '+proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=2600000 +y_0=1200000 +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs');
proj4.defs('EPSG:4326', '+proj=longlat +datum=WGS84 +no_defs');
const coordinates = proj4('EPSG:2056', 'EPSG:4326', [lon, lat]);
return `${coordinates[0]},${coordinates[1]}`;
}
}
const fundmuenzen = new Fundmuenzen(__dirname + '/../config.json');
{
"compilerOptions": {
"target": "es6",
"module": "commonjs",
"outDir": "dist",
"inlineSourceMap": true,
"declaration": true,
"moduleResolution": "node",
"typeRoots": [
"node_modules/@types"
]
},
"include": [
"src/**/*.ts"
],
"exclude": [
"node_modules"
]
}
{
"defaultSeverity": "error",
"extends": [
"tslint:recommended"
],
"jsRules": {},
"rules": {
"quotemark": [
true,
"single"
],
"object-literal-sort-keys": false
},
"rulesDirectory": []
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment