Verified Commit 16a8af29 authored by Torsten Grote's avatar Torsten Grote
Browse files

Print a list of countries that might block bridges

This takes a configurable threshold for the median success rate of all
bridges that have at least a 80 success rate in the control group
parent 97b15826
.idea
__pycache__
bridge-timeline.csv
bridge-timeline-by-dest.pdf
bridge-timeline-by-site.pdf
bridge-timeline-plot-*.png
countries.json
countries-bridges.csv
\ No newline at end of file
/bridge-timeline.csv
/bridge-timeline-by-dest.pdf
/bridge-timeline-by-site.pdf
/bridge-timeline-plot-*.png
/countries.json
/countries-bridges.json
......@@ -7,6 +7,7 @@ import statistics
import pendulum
from tqdm import tqdm
import json
from ooni_analyzer import OoniAnalyser
......@@ -15,6 +16,7 @@ CONTROL_COUNTRIES = ['US', 'GB', 'DE', 'IT']
UNIT = 'days' # hours, days, weeks
INTERVAL = 1
BRIDGE_CONTROL_THRESHOLD = 80 # only consider bridges with a control success rate higher than this
COUNTRIES = BLOCKING_COUNTRIES + CONTROL_COUNTRIES
FAILURES = [
......@@ -119,48 +121,67 @@ class BridgeAnalyzer(OoniAnalyser):
def process_data(self):
self.count_totals_for_successful_buckets()
nicknames, types = self.get_nicknames_and_types()
# re-order data for CSV export
csv_dict = {}
# re-order data for JSON export
output_dict = {}
success_rates = {}
for bridge, data in tqdm(self.bridge_data.items(), desc="Calculating Success Rate"):
# add bridge name
if bridge not in csv_dict:
if bridge not in output_dict:
name = nicknames.get(bridge, bridge)
csv_dict[bridge] = {'bridge': name}
csv_dict[bridge]["type"] = types.get(bridge, None)
output_dict[bridge] = {'bridge': name}
output_dict[bridge]["type"] = types.get(bridge, None)
# get total and ignore two few measurements
if data['total'] is None or data['total'] < 500:
del csv_dict[bridge]
del output_dict[bridge]
continue
csv_dict[bridge]['total'] = data['total']
output_dict[bridge]['total'] = data['total']
# go through all buckets
for cc, country in data['countries'].items():
# calculate success rate
total = country['success'] + country['failure']
success_rate = round(country['success'] / total * 100, 2)
csv_dict[bridge][cc] = success_rate
output_dict[bridge][cc] = success_rate
# add total count
csv_dict[bridge][cc + "#"] = total
output_dict[bridge][cc + "#"] = total
# calculate median success rate of control group
rates = []
for cc in CONTROL_COUNTRIES:
if cc in csv_dict[bridge]:
rates.append(csv_dict[bridge][cc])
if cc in output_dict[bridge]:
rates.append(output_dict[bridge][cc])
if len(rates) > 0:
csv_dict[bridge]['ctrl'] = statistics.median(rates)
output_dict[bridge]['ctrl'] = round(statistics.median(rates), 2)
else:
del csv_dict[bridge]
# sort CSV by control group success rate
csv_dict = collections.OrderedDict(
sorted(csv_dict.items(), key=lambda x: x[1]['ctrl'], reverse=True)
)
# write CSV file
with open('countries-bridges.csv', 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=self.get_field_names())
writer.writeheader()
for row in csv_dict.values():
writer.writerow(row)
del output_dict[bridge]
continue
# remember success rates for blocking countries
if output_dict[bridge]['ctrl'] >= BRIDGE_CONTROL_THRESHOLD:
for cc in BLOCKING_COUNTRIES:
bridge_type = output_dict[bridge]["type"]
if bridge_type == "B" or bridge_type == "TB":
if cc not in output_dict[bridge]:
continue
success_rate = output_dict[bridge][cc]
if cc not in success_rates:
success_rates[cc] = []
success_rates[cc].append(success_rate)
blocking_countries = []
for cc, rates in success_rates.items():
median = round(statistics.median(rates), 2)
print("%s: %f" % (cc, median))
if median < self.args.threshold:
blocking_countries.append(cc)
# print blocking countries
blocking_countries = sorted(blocking_countries)
print()
print("Blocking Countries: %s" % str(blocking_countries))
print()
# write JSON file
if self.args.json:
with open('countries-bridges.json', 'w') as f:
json.dump(list(output_dict.values()), f)
def get_new_bridge(self):
bridge = {
......
......@@ -17,16 +17,6 @@ def main():
class TorAnalyzer(OoniAnalyser):
country_data = collections.OrderedDict()
def init_arg_parser(self):
parser = super().init_arg_parser()
parser.add_argument('-t', '--success-threshold', dest='threshold', type=float, default=50,
metavar='PERCENT',
help='consider countries as blocking with a success rate less than this'
+ '. default: 50')
parser.add_argument('-j', '--json', dest='json', action='store_true',
help='create a \'countries.json\' file with the results')
return parser
def parse_report(self, data):
if data['test_keys']['success'] is None:
return
......@@ -53,7 +43,7 @@ class TorAnalyzer(OoniAnalyser):
# print blocking countries
blocking_countries = sorted(blocking_countries)
print()
print("Blocked Countries: %s" % str(blocking_countries))
print("Blocking Countries: %s" % str(blocking_countries))
print()
for country in blocking_countries:
print("%s: %s" % (country, str(self.country_data[country])))
......
......@@ -26,6 +26,12 @@ class OoniAnalyser:
help='directory with OONI reports as supplied by ooni-sync')
parser.add_argument('--since', dest='since', type=date, metavar='YYYY-MM-DD',
help='only consider reports after that date, e.g. 2017-12-24')
parser.add_argument('-t', '--success-threshold', dest='threshold', type=float, default=50,
metavar='PERCENT',
help='consider countries as blocking with a success rate less than this'
+ '. default: 50')
parser.add_argument('-j', '--json', dest='json', action='store_true',
help='create a JSON file with the results')
return parser
def use_country(self, cc):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment