Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
briar
tor-circumvention-analytics
Commits
03d47b1b
Verified
Commit
03d47b1b
authored
Jul 03, 2018
by
Torsten Grote
Browse files
Refactor shared code into super class
parent
432201d3
Changes
3
Hide whitespace changes
Inline
Side-by-side
countries-that-block-bridges.py
View file @
03d47b1b
#!/usr/bin/env python3
import
argparse
import
collections
import
csv
import
json
import
lzma
import
os
import
re
import
sys
from
datetime
import
datetime
BRIDGES
=
{}
from
ooni_analyzer
import
OoniAnalyser
COUNTRIES
=
[
'BY'
,
'CN'
,
'EG'
,
'TR'
,
'IR'
,
# Tor assumed to be blocked
'US'
,
'GB'
,
'DE'
,
'IT'
,
'RU'
,
# Tor should not be blocked (control group)
'US'
,
'GB'
,
'DE'
,
'IT'
,
# Tor should not be blocked (control group)
]
FAILURES
=
[
'generic_timeout_error'
,
...
...
@@ -22,125 +16,97 @@ FAILURES = [
'tcp_timed_out_error'
,
'dns_lookup_error'
]
RESULTS
=
FAILURES
+
[
'success'
]
DATE_REGEX
=
re
.
compile
(
"^(\d{8}T\d{6}Z)-"
)
def
main
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Get countries where Tor bridges might be blocked.'
)
parser
.
add_argument
(
'path'
,
metavar
=
'path'
,
type
=
str
,
help
=
'directory with OONI reports as supplied by ooni-sync'
)
parser
.
add_argument
(
'--since'
,
dest
=
'since'
,
type
=
date
,
metavar
=
'YY-MM-DD'
,
help
=
'only consider reports after that date, e.g. 2017-12-24'
)
args
=
parser
.
parse_args
()
if
not
os
.
path
.
isdir
(
args
.
path
):
fail
(
"Could not find directory '%s'"
%
args
.
path
)
for
file
in
sorted
(
os
.
listdir
(
args
.
path
)):
if
not
file
.
endswith
(
'.json.xz'
):
continue
# only consider reports since the supplied date
if
args
.
since
:
match
=
DATE_REGEX
.
match
(
file
)
if
match
:
file_date
=
datetime
.
strptime
(
match
.
group
(
1
),
'%Y%m%dT%H%M%SZ'
)
if
file_date
<
args
.
since
:
continue
# open compressed report file
with
lzma
.
open
(
os
.
path
.
join
(
args
.
path
,
file
),
'r'
)
as
f
:
for
line
in
f
.
readlines
():
data
=
json
.
loads
(
line
)
# filter out measurements with missing information
if
'test_class'
not
in
data
[
'annotations'
]:
continue
if
data
[
'annotations'
][
'test_class'
]
!=
'tor_bridge_reachability'
:
continue
if
'connection'
not
in
data
[
'test_keys'
]:
continue
# initialize bridge data if needed
bridge
=
data
[
'input'
]
if
bridge
not
in
BRIDGES
:
BRIDGES
[
bridge
]
=
get_new_bridge
()
BRIDGES
[
bridge
][
'total'
]
+=
1
# add results per country
cc
=
data
[
'probe_cc'
]
if
cc
not
in
COUNTRIES
:
continue
if
cc
not
in
BRIDGES
[
bridge
]:
BRIDGES
[
bridge
][
cc
]
=
get_new_country
()
result
=
data
[
'test_keys'
][
'connection'
]
if
result
==
'success'
or
result
in
FAILURES
:
BRIDGES
[
bridge
][
cc
][
result
]
+=
1
else
:
fail
(
"Unknown connection result: %s"
%
result
)
# re-order data for CSV export
csv_dict
=
collections
.
OrderedDict
()
csv_field_names
=
set
()
csv_field_names
.
add
(
'@bridge'
)
csv_field_names
.
add
(
'@total'
)
for
bridge
,
data
in
BRIDGES
.
items
():
for
key
,
value
in
sorted
(
data
.
items
()):
# add bridge name
if
bridge
not
in
csv_dict
:
csv_dict
[
bridge
]
=
{
'@bridge'
:
bridge
}
# get total and ignore two few measurements
if
key
==
'total'
:
if
value
is
None
or
value
<
500
:
del
csv_dict
[
bridge
]
break
csv_dict
[
bridge
][
'@total'
]
=
value
continue
# not a country
# calculate success rate
country
=
value
for
result
,
num
in
country
.
items
():
if
result
!=
'total'
and
result
!=
'failure'
:
country
[
'total'
]
+=
num
if
result
in
FAILURES
:
country
[
'failure'
]
+=
num
field_name
=
key
csv_field_names
.
add
(
field_name
)
success_rate
=
round
(
country
[
'success'
]
/
country
[
'total'
]
*
100
,
2
)
csv_dict
[
bridge
][
field_name
]
=
success_rate
field_name
=
"#"
+
key
csv_field_names
.
add
(
field_name
)
csv_dict
[
bridge
][
field_name
]
=
country
[
'total'
]
# write CSV file
with
open
(
'countries-bridges.csv'
,
'w'
,
newline
=
''
)
as
f
:
writer
=
csv
.
DictWriter
(
f
,
fieldnames
=
sorted
(
list
(
csv_field_names
)))
writer
.
writeheader
()
for
row
in
csv_dict
.
values
():
writer
.
writerow
(
row
)
def
date
(
date_str
):
# Converts a string supplied at the command line into a date
return
datetime
.
strptime
(
date_str
,
"%Y-%m-%d"
)
def
get_new_bridge
():
return
{
'total'
:
0
,
}
def
get_new_country
():
country
=
{
'total'
:
0
,
'success'
:
0
,
'failure'
:
0
,
}
for
failure
in
FAILURES
:
country
[
failure
]
=
0
return
country
def
fail
(
msg
=
""
):
sys
.
stderr
.
write
(
"Error: %s
\n
"
%
msg
)
sys
.
exit
(
1
)
analyzer
=
BridgeAnalyzer
(
'Get countries where Tor bridges might be blocked.'
)
analyzer
.
analyze
()
class
BridgeAnalyzer
(
OoniAnalyser
):
bridge_data
=
{}
def
parse_report
(
self
,
data
):
# filter out measurements with missing information
if
'test_class'
not
in
data
[
'annotations'
]:
return
if
data
[
'annotations'
][
'test_class'
]
!=
'tor_bridge_reachability'
:
return
if
'connection'
not
in
data
[
'test_keys'
]:
return
# initialize bridge data if needed
bridge
=
data
[
'input'
]
if
bridge
not
in
self
.
bridge_data
:
self
.
bridge_data
[
bridge
]
=
self
.
get_new_bridge
()
self
.
bridge_data
[
bridge
][
'total'
]
+=
1
# add results per country
cc
=
data
[
'probe_cc'
]
if
cc
not
in
COUNTRIES
:
return
if
cc
not
in
self
.
bridge_data
[
bridge
]:
self
.
bridge_data
[
bridge
][
cc
]
=
self
.
get_new_country
()
result
=
data
[
'test_keys'
][
'connection'
]
if
result
==
'success'
or
result
in
FAILURES
:
self
.
bridge_data
[
bridge
][
cc
][
result
]
+=
1
else
:
self
.
fail
(
"Unknown connection result: %s"
%
result
)
def
process_data
(
self
):
# re-order data for CSV export
csv_dict
=
collections
.
OrderedDict
()
csv_field_names
=
set
()
csv_field_names
.
add
(
'@bridge'
)
csv_field_names
.
add
(
'@total'
)
for
bridge
,
data
in
self
.
bridge_data
.
items
():
for
key
,
value
in
sorted
(
data
.
items
()):
# add bridge name
if
bridge
not
in
csv_dict
:
csv_dict
[
bridge
]
=
{
'@bridge'
:
bridge
}
# get total and ignore two few measurements
if
key
==
'total'
:
if
value
is
None
or
value
<
500
:
del
csv_dict
[
bridge
]
break
csv_dict
[
bridge
][
'@total'
]
=
value
continue
# not a country
# calculate success rate
country
=
value
for
result
,
num
in
country
.
items
():
if
result
!=
'total'
and
result
!=
'failure'
:
country
[
'total'
]
+=
num
if
result
in
FAILURES
:
country
[
'failure'
]
+=
num
field_name
=
key
csv_field_names
.
add
(
field_name
)
success_rate
=
round
(
country
[
'success'
]
/
country
[
'total'
]
*
100
,
2
)
csv_dict
[
bridge
][
field_name
]
=
success_rate
field_name
=
"#"
+
key
csv_field_names
.
add
(
field_name
)
csv_dict
[
bridge
][
field_name
]
=
country
[
'total'
]
# write CSV file
with
open
(
'countries-bridges.csv'
,
'w'
,
newline
=
''
)
as
f
:
writer
=
csv
.
DictWriter
(
f
,
fieldnames
=
sorted
(
list
(
csv_field_names
)))
writer
.
writeheader
()
for
row
in
csv_dict
.
values
():
writer
.
writerow
(
row
)
@
staticmethod
def
get_new_bridge
():
return
{
'total'
:
0
,
}
@
staticmethod
def
get_new_country
():
country
=
{
'total'
:
0
,
'success'
:
0
,
'failure'
:
0
,
}
for
failure
in
FAILURES
:
country
[
failure
]
=
0
return
country
if
__name__
==
"__main__"
:
...
...
countries-that-block-tor.py
View file @
03d47b1b
#!/usr/bin/env python3
import
argparse
import
collections
import
json
import
lzma
import
os
import
re
import
sys
from
datetime
import
datetime
import
emojiflags.lookup
import
pycountry
COUNTRY_DATA
=
collections
.
OrderedDict
()
DATE_REGEX
=
re
.
compile
(
"^(\d{8}T\d{6}Z)-"
)
from
ooni_analyzer
import
OoniAnalyser
def
main
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Get countries where Tor might be blocked.'
)
parser
.
add_argument
(
'path'
,
metavar
=
'path'
,
type
=
str
,
help
=
'directory with OONI reports as supplied by ooni-sync'
)
parser
.
add_argument
(
'--since'
,
dest
=
'since'
,
type
=
date
,
metavar
=
'YY-MM-DD'
,
help
=
'only consider reports after that date, e.g. 2017-12-24'
)
parser
.
add_argument
(
'-t'
,
'--success-threshold'
,
dest
=
'threshold'
,
type
=
float
,
default
=
50
,
metavar
=
'PERCENT'
,
help
=
'consider countries as blocking with a success rate less than this'
+
'default: 50'
)
parser
.
add_argument
(
'-j'
,
'--json'
,
dest
=
'json'
,
action
=
'store_true'
,
help
=
'create a
\'
countries.json
\'
file with the results'
)
args
=
parser
.
parse_args
()
if
not
os
.
path
.
isdir
(
args
.
path
):
fail
(
"Could not find directory '%s'"
%
args
.
path
)
# get success and failure counts per country from report data
for
file
in
sorted
(
os
.
listdir
(
args
.
path
)):
if
not
file
.
endswith
(
'.json.xz'
):
continue
# only consider reports since the supplied date
if
args
.
since
:
match
=
DATE_REGEX
.
match
(
file
)
if
match
:
file_date
=
datetime
.
strptime
(
match
.
group
(
1
),
'%Y%m%dT%H%M%SZ'
)
if
file_date
<
args
.
since
:
continue
# open compressed report file
with
lzma
.
open
(
os
.
path
.
join
(
args
.
path
,
file
),
'r'
)
as
f
:
for
line
in
f
.
readlines
():
data
=
json
.
loads
(
line
)
if
data
[
'test_keys'
][
'success'
]
is
None
:
continue
cc
=
data
[
'probe_cc'
]
if
cc
not
in
COUNTRY_DATA
:
COUNTRY_DATA
[
cc
]
=
get_new_country
(
cc
)
COUNTRY_DATA
[
cc
][
'total_count'
]
+=
1
if
data
[
"test_keys"
][
"success"
]:
COUNTRY_DATA
[
cc
][
'success_count'
]
+=
1
else
:
COUNTRY_DATA
[
cc
][
'failure_count'
]
+=
1
# calculate success percentage and identify blocking countries
blocking_countries
=
[]
for
country
,
data
in
sorted
(
COUNTRY_DATA
.
items
()):
data
[
'success_percent'
]
=
round
(
data
[
'success_count'
]
/
data
[
'total_count'
]
*
100
,
2
)
if
data
[
'total_count'
]
>
5
:
if
data
[
'success_percent'
]
<
args
.
threshold
:
blocking_countries
.
append
(
country
)
print
(
"%s: %d reports, %.2f success rate"
%
(
country
,
data
[
'total_count'
],
data
[
'success_percent'
]))
# print blocking countries
blocking_countries
=
sorted
(
blocking_countries
)
print
()
print
(
"Blocked Countries: %s"
%
str
(
blocking_countries
))
print
()
for
country
in
blocking_countries
:
print
(
"%s: %s"
%
(
country
,
str
(
COUNTRY_DATA
[
country
])))
# write a JSON file with all data to be published on the web
if
args
.
json
:
with
open
(
'countries.json'
,
'w'
)
as
f
:
f
.
write
(
json
.
dumps
(
list
(
COUNTRY_DATA
.
values
()),
indent
=
4
))
def
date
(
date_str
):
# Converts a string supplied at the command line into a date
return
datetime
.
strptime
(
date_str
,
"%Y-%m-%d"
)
def
get_new_country
(
cc
):
try
:
country
=
pycountry
.
countries
.
get
(
alpha_2
=
cc
).
name
flag
=
emojiflags
.
lookup
.
lookup
(
cc
)
country
=
"%s %s"
%
(
flag
,
country
)
except
KeyError
:
country
=
cc
return
{
'country'
:
country
,
'total_count'
:
0
,
'success_count'
:
0
,
'failure_count'
:
0
,
'success_percent'
:
None
,
}
def
fail
(
msg
=
""
):
sys
.
stderr
.
write
(
"Error: %s
\n
"
%
msg
)
sys
.
exit
(
1
)
analyzer
=
TorAnalyzer
(
'Get countries where Tor might be blocked.'
)
analyzer
.
analyze
()
class
TorAnalyzer
(
OoniAnalyser
):
country_data
=
collections
.
OrderedDict
()
def
init_arg_parser
(
self
):
parser
=
super
().
init_arg_parser
()
parser
.
add_argument
(
'-t'
,
'--success-threshold'
,
dest
=
'threshold'
,
type
=
float
,
default
=
50
,
metavar
=
'PERCENT'
,
help
=
'consider countries as blocking with a success rate less than this'
+
'. default: 50'
)
parser
.
add_argument
(
'-j'
,
'--json'
,
dest
=
'json'
,
action
=
'store_true'
,
help
=
'create a
\'
countries.json
\'
file with the results'
)
return
parser
def
parse_report
(
self
,
data
):
if
data
[
'test_keys'
][
'success'
]
is
None
:
return
cc
=
data
[
'probe_cc'
]
if
cc
not
in
self
.
country_data
:
self
.
country_data
[
cc
]
=
self
.
get_new_country
(
cc
)
self
.
country_data
[
cc
][
'total_count'
]
+=
1
if
data
[
"test_keys"
][
"success"
]:
self
.
country_data
[
cc
][
'success_count'
]
+=
1
else
:
self
.
country_data
[
cc
][
'failure_count'
]
+=
1
def
process_data
(
self
):
# calculate success percentage and identify blocking countries
blocking_countries
=
[]
for
country
,
data
in
sorted
(
self
.
country_data
.
items
()):
data
[
'success_percent'
]
=
round
(
data
[
'success_count'
]
/
data
[
'total_count'
]
*
100
,
2
)
if
data
[
'total_count'
]
>
5
:
if
data
[
'success_percent'
]
<
self
.
args
.
threshold
:
blocking_countries
.
append
(
country
)
print
(
"%s: %d reports, %.2f success rate"
%
(
country
,
data
[
'total_count'
],
data
[
'success_percent'
]))
# print blocking countries
blocking_countries
=
sorted
(
blocking_countries
)
print
()
print
(
"Blocked Countries: %s"
%
str
(
blocking_countries
))
print
()
for
country
in
blocking_countries
:
print
(
"%s: %s"
%
(
country
,
str
(
self
.
country_data
[
country
])))
# write a JSON file with all data to be published on the web
if
self
.
args
.
json
:
with
open
(
'countries.json'
,
'w'
)
as
f
:
f
.
write
(
json
.
dumps
(
list
(
self
.
country_data
.
values
()),
indent
=
4
))
@
staticmethod
def
get_new_country
(
cc
):
try
:
country
=
pycountry
.
countries
.
get
(
alpha_2
=
cc
).
name
flag
=
emojiflags
.
lookup
.
lookup
(
cc
)
country
=
"%s %s"
%
(
flag
,
country
)
except
KeyError
:
country
=
cc
return
{
'country'
:
country
,
'total_count'
:
0
,
'success_count'
:
0
,
'failure_count'
:
0
,
'success_percent'
:
None
,
}
if
__name__
==
"__main__"
:
...
...
ooni_analyzer.py
0 → 100644
View file @
03d47b1b
import
argparse
import
json
import
lzma
import
os
import
re
import
sys
from
datetime
import
datetime
DATE_REGEX
=
re
.
compile
(
"^(\d{8}T\d{6}Z)-"
)
class
OoniAnalyser
:
def
__init__
(
self
,
description
):
self
.
args
=
None
self
.
description
=
description
parser
=
self
.
init_arg_parser
()
self
.
args
=
parser
.
parse_args
()
def
init_arg_parser
(
self
):
parser
=
argparse
.
ArgumentParser
(
description
=
self
.
description
)
parser
.
add_argument
(
'path'
,
metavar
=
'path'
,
type
=
str
,
help
=
'directory with OONI reports as supplied by ooni-sync'
)
parser
.
add_argument
(
'--since'
,
dest
=
'since'
,
type
=
date
,
metavar
=
'YY-MM-DD'
,
help
=
'only consider reports after that date, e.g. 2017-12-24'
)
return
parser
def
analyze
(
self
):
if
not
os
.
path
.
isdir
(
self
.
args
.
path
):
self
.
fail
(
"Could not find directory '%s'"
%
self
.
args
.
path
)
# get success and failure counts per country from report data
for
file
in
sorted
(
os
.
listdir
(
self
.
args
.
path
)):
if
not
file
.
endswith
(
'.json.xz'
):
continue
# only consider reports since the supplied date
if
self
.
args
.
since
:
match
=
DATE_REGEX
.
match
(
file
)
if
match
:
file_date
=
datetime
.
strptime
(
match
.
group
(
1
),
'%Y%m%dT%H%M%SZ'
)
if
file_date
<
self
.
args
.
since
:
continue
# open compressed report file
with
lzma
.
open
(
os
.
path
.
join
(
self
.
args
.
path
,
file
),
'r'
)
as
f
:
for
line
in
f
.
readlines
():
data
=
json
.
loads
(
line
)
self
.
parse_report
(
data
)
self
.
process_data
()
def
parse_report
(
self
,
data
):
raise
NotImplementedError
()
def
process_data
(
self
):
raise
NotImplementedError
()
@
staticmethod
def
fail
(
msg
=
""
):
sys
.
stderr
.
write
(
"Error: %s
\n
"
%
msg
)
sys
.
exit
(
1
)
def
date
(
date_str
):
# Converts a string supplied at the command line into a date
return
datetime
.
strptime
(
date_str
,
"%Y-%m-%d"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment