Commit 09347710 authored by akwizgran's avatar akwizgran

Merge branch 'command-line-arguments' into 'master'

Add hard-coded parameters as command line arguments

See merge request !2
parents 7ca0d8df 95c459b4
#!/usr/bin/env python3
import collections
import csv
import datetime
import json
import statistics
import pendulum
from tqdm import tqdm
import json
from analyzer import OoniAnalyser
......@@ -18,7 +17,6 @@ UNIT = 'days' # hours, days, weeks
INTERVAL = 1
BRIDGE_CONTROL_THRESHOLD = 80 # only consider bridges with a control success rate higher than this
COUNTRIES = BLOCKING_COUNTRIES + CONTROL_COUNTRIES
FAILURES = [
'generic_timeout_error',
'connection_refused_error',
......@@ -35,16 +33,44 @@ def main():
class BridgeAnalyzer(OoniAnalyser):
pre_report_init = False
period = None
blocking_countries = []
control_countries = []
countries = []
bridge_data = {}
def init_arg_parser(self):
parser = super().init_arg_parser()
parser.add_argument('-n', '--nicknames', dest='nicknames', action='store_true',
help='replace bridge addresses with nicknames')
parser.add_argument('--control-threshold', type=float, dest='control_threshold',
default=BRIDGE_CONTROL_THRESHOLD, metavar='PERCENT',
help='ignore bridges with a control success rate lower than this'
+ '. default: %d' % BRIDGE_CONTROL_THRESHOLD)
parser.add_argument('--bucket-size', type=int, metavar='NUM', dest='bucket_size',
default=INTERVAL,
help='size of the buckets reports are aggregated in. default: %d'
% INTERVAL)
parser.add_argument('--bucket-unit', choices=['hours', 'days', 'weeks'], dest='bucket_unit',
default=UNIT,
help='unit of the buckets reports are aggregated in. default: %s'
% UNIT)
parser.add_argument('-c', '--countries', nargs='+', metavar='COUNTRY_CODE',
default=BLOCKING_COUNTRIES,
help='list of country codes to consider for blocking')
parser.add_argument('--control-countries', nargs='+', dest='control_countries',
metavar='COUNTRY_CODE', default=CONTROL_COUNTRIES,
help='list of country codes of countries in the control group, '
+ 'i.e. countries unlikely to block Tor.' )
return parser
def analyze(self):
self.blocking_countries = [country.upper() for country in self.args.countries]
self.control_countries = [country.upper() for country in self.args.control_countries]
self.countries = self.blocking_countries + self.control_countries
super().analyze()
def use_country(self, cc):
return cc in COUNTRIES
return cc in self.countries
@staticmethod
def use_report(data):
......@@ -60,7 +86,8 @@ class BridgeAnalyzer(OoniAnalyser):
def pre_parse_report(self):
# convert date of first report to clean pendulum date
timestamp = self.start_date.replace(tzinfo=datetime.timezone.utc).timestamp()
self.start_date = pendulum.from_timestamp(timestamp).start_of(UNIT.rstrip('s'))
time_unit = self.args.bucket_unit.rstrip('s')
self.start_date = pendulum.from_timestamp(timestamp).start_of(time_unit)
self.period = pendulum.period(self.start_date, pendulum.now())
# remember to not call this again
self.pre_report_init = True
......@@ -108,13 +135,12 @@ class BridgeAnalyzer(OoniAnalyser):
data['countries'][cc]['success'] += country['success']
data['countries'][cc]['failure'] += country['failure']
@staticmethod
def get_field_names():
blocking_total = [cc + "#" for cc in BLOCKING_COUNTRIES]
control_total = [cc + "#" for cc in CONTROL_COUNTRIES]
def get_field_names(self):
blocking_total = [cc + "#" for cc in self.blocking_countries]
control_total = [cc + "#" for cc in self.control_countries]
return ('bridge', 'type', 'total') + \
tuple(sorted(BLOCKING_COUNTRIES)) + \
tuple(['ctrl'] + sorted(CONTROL_COUNTRIES)) + \
tuple(sorted(self.blocking_countries)) + \
tuple(['ctrl'] + sorted(self.control_countries)) + \
tuple(sorted(blocking_total)) + \
tuple(sorted(control_total))
......@@ -145,7 +171,7 @@ class BridgeAnalyzer(OoniAnalyser):
output_dict[bridge][cc + "#"] = total
# calculate median success rate of control group
rates = []
for cc in CONTROL_COUNTRIES:
for cc in self.control_countries:
if cc in output_dict[bridge]:
rates.append(output_dict[bridge][cc])
if len(rates) > 0:
......@@ -154,8 +180,8 @@ class BridgeAnalyzer(OoniAnalyser):
del output_dict[bridge]
continue
# remember success rates for blocking countries
if output_dict[bridge]['ctrl'] >= BRIDGE_CONTROL_THRESHOLD:
for cc in BLOCKING_COUNTRIES:
if output_dict[bridge]['ctrl'] >= self.args.control_threshold:
for cc in self.blocking_countries:
bridge_type = output_dict[bridge]["type"]
if bridge_type == "B" or bridge_type == "TB":
if cc not in output_dict[bridge]:
......@@ -190,7 +216,7 @@ class BridgeAnalyzer(OoniAnalyser):
'countries': {},
}
# initialize time buckets
for dt in self.period.range(UNIT, amount=INTERVAL):
for dt in self.period.range(self.args.bucket_unit, amount=self.args.bucket_size):
bucket = {
'dt': str(dt),
'success': False,
......@@ -208,15 +234,15 @@ class BridgeAnalyzer(OoniAnalyser):
def get_bucket(self, bridge, dt):
duration = dt - self.start_date
if UNIT == 'weeks':
if self.args.bucket_unit == 'weeks':
diff = duration.total_weeks()
elif UNIT == 'days':
elif self.args.bucket_unit == 'days':
diff = duration.total_days()
elif UNIT == 'hours':
elif self.args.bucket_unit == 'hours':
diff = duration.total_hours()
else:
raise RuntimeError("Unknown time unit: %s" % UNIT)
pos = int(diff / INTERVAL)
raise RuntimeError("Unknown time unit: %s" % self.args.bucket_unit)
pos = int(diff / self.args.bucket_size)
return self.bridge_data[bridge]['buckets'][pos]
def get_nicknames_and_types(self):
......
$("#bridge_reachability").tabulator({
layout:"fitData",
// TODO get this from JSON instead of hardcoding it
columns:[
{title:"Node", field:"bridge"},
{title:"Type", field:"type"},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment