Skip to content

Commit

Permalink
Merge pull request #7 from ntkog/feature/improve_regex
Browse files Browse the repository at this point in the history
Feature/improve regex
  • Loading branch information
ntkog authored May 25, 2019
2 parents 198623e + e9b04b3 commit 1b4288a
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 19 deletions.
11 changes: 8 additions & 3 deletions config/elections.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,25 @@
"PP",
"pablocasado_",
"Casado",
"ppopular"
"ppopular",
"populares"
],
"psoe": [
"PSOE",
"sanchezcastejon",
"Sanchez"
"Sanchez",
"socialistas"
],
"podemos": [
"Podemos",
"pablo_iglesias_",
"ahorapodemos"
"ahorapodemos",
"unidaspodemos",
"unidas podemos"
],
"ciudadanos": [
"Ciudadanos",
"Cs",
"Albert_Rivera",
"Rivera",
"ciudadanoscs"
Expand Down
28 changes: 18 additions & 10 deletions lib/elections_stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@ const colors = require('colors');
const fs = require('fs');
const twitterStream = require('twitter-stream-api');
const es = require('event-stream');
const {mapping_parties} = require('./helpers/elections.js');
const {createObjectCsvWriter} = require('csv-writer');

var geocoder;
var geocoder,mapping_parties;
// **** BEGIN INTERNAL API ****
function _setGeocoder(geo) {
geocoder = geo;
}
function _setMappingWords(str) {
mapping_parties = require('./helpers/elections.js')(str);
}

function _isoDate(dateStr) {
let date = new Date(dateStr);
Expand Down Expand Up @@ -52,7 +54,7 @@ function _virtualLocTweet (t, coords) {

function _addCcaaTweet(t){
let ccaa = geocoder.findCCAA(t)
return {...t,
return {...t,
ccaa: {...ccaa}
}
}
Expand Down Expand Up @@ -134,17 +136,21 @@ function mapTweet(tweet, callback) {
function filterEmptyTweets(o){
try {
let d = JSON.parse(new Buffer.from(o).toString('utf8'));

let randomizeFailed = (o.lat === 0 & o.lon === 0);
// if (!randomizeFailed) {
// console.log(d);
// }
return !(d.hasOwnProperty("error") || randomizeFailed);
return !(d.hasOwnProperty("error"));
} catch(err) {
console.log(`Error on [filterEmptyTweets] : ${err}`);
}
}

function filterUnMarked(o){
try {
let d = JSON.parse(new Buffer.from(o).toString('utf8'));
return d.markWords.length > 0;
} catch(err) {
console.log(`Error on [unMarked] : ${err}`);
}
}

function classifyTweets(t, callback){
console.log("entra");
try {
Expand All @@ -162,11 +168,13 @@ function setup(conf) {
let twStream = require('./twitter_stream.js')(conf.twitter);
let geocoder = require('./batch_geocoder.js')(conf.geocoders);
_setGeocoder(geocoder);
_setMappingWords(conf.twitter.track);
let pipeline = twStream
.pipe(es.filterSync(isGeoTweet))
.pipe(es.map(mapTweet))
.pipe(es.filterSync(filterEmptyTweets))
.pipe(es.map(classifyTweets));
.pipe(es.map(classifyTweets))
.pipe(es.filterSync(filterUnMarked));

if (conf.hasOwnProperty("ws")) {
let wsStream = require('./helpers/ws_client_stream.js')(conf.ws);
Expand Down
43 changes: 37 additions & 6 deletions lib/helpers/elections.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,45 @@
const PARTIDOS = require('../../config/elections.json').words;
const WORDS = require('../../config/elections.json').words;
const WORD_SEPARATOR = ",";
const PARTIES = Object.keys(WORDS);

let rePartiesMap = PARTIES.reduce((old,cur,i,arr) => {
old[cur] = new RegExp(`\\b(${WORDS[cur].map(el => el.toLowerCase()).join("|")})\\b`, "ig")
return old;
}, {})

let PARTIES_EXTENDED = [
...PARTIES.map(k => [...WORDS[k]])
]
.reduce((old,cur,i,arr) => {
old.push(...cur);
return old; },
[]);
console.log(PARTIES_EXTENDED);

const reParties = new RegExp(`\\b(${Object.keys(PARTIES_EXTENDED).join("|")})\\b`, "ig");
var reTrackingWords;

function mapping_parties (t) {
let obj = Object.keys(PARTIDOS).reduce((old,cur,i,arr) => {
reParties = new RegExp(`(${PARTIDOS[cur].join("|").toLowerCase()})`, "ig");
old[cur] = reParties.test(t.text);
let matchWords = t.text.match(reTrackingWords);
var markWords = new Set(matchWords);
let obj = PARTIES.reduce((old,cur,i,arr) => {
let test = rePartiesMap[cur].test(t.text)
old[cur] = test;
// if (test) {
// markWords.add(cur.toLowerCase());
// }
return old;
}, {});
obj.markWords = [...markWords];
return obj;
}

module.exports = {
mapping_parties : mapping_parties
function _setRegexWords (wordsStr){
let trackingWords = wordsStr.split(WORD_SEPARATOR).filter(el => !reParties.test(el));
reTrackingWords = new RegExp(`\\b(${trackingWords.map(el => el.toLowerCase()).join("|")})\\b`, "ig");
}

module.exports = function(words2track) {
_setRegexWords(words2track);
return mapping_parties;
}

0 comments on commit 1b4288a

Please sign in to comment.