Files
edx-platform/fixtures/anonymize_fixtures.py
2012-05-08 10:19:23 -04:00

99 lines
2.9 KiB
Python
Executable File

#! /usr/bin/env python
import sys
import json
import random
import copy
from collections import defaultdict
from argparse import ArgumentParser, FileType
from datetime import datetime
def generate_user(user_number):
return {
"pk": user_number,
"model": "auth.user",
"fields": {
"status": "w",
"last_name": "Last",
"gold": 0,
"is_staff": False,
"user_permissions": [],
"interesting_tags": "",
"email_key": None,
"date_joined": "2012-04-26 11:36:39",
"first_name": "",
"email_isvalid": False,
"avatar_type": "n",
"website": "",
"is_superuser": False,
"date_of_birth": None,
"last_login": "2012-04-26 11:36:48",
"location": "",
"new_response_count": 0,
"email": "user{num}@example.com".format(num=user_number),
"username": "user{num}".format(num=user_number),
"is_active": True,
"consecutive_days_visit_count": 0,
"email_tag_filter_strategy": 1,
"groups": [],
"password": "sha1$90e6f$562a1d783a0c47ce06ebf96b8c58123a0671bbf0",
"silver": 0,
"bronze": 0,
"questions_per_page": 10,
"about": "",
"show_country": True,
"country": "",
"display_tag_filter_strategy": 0,
"seen_response_count": 0,
"real_name": "",
"ignored_tags": "",
"reputation": 1,
"gravatar": "366d981a10116969c568a18ee090f44c",
"last_seen": "2012-04-26 11:36:39"
}
}
def parse_args(args=sys.argv[1:]):
parser = ArgumentParser()
parser.add_argument('-d', '--data', type=FileType('r'), default=sys.stdin)
parser.add_argument('-o', '--output', type=FileType('w'), default=sys.stdout)
parser.add_argument('count', type=int)
return parser.parse_args(args)
def main(args=sys.argv[1:]):
args = parse_args(args)
data = json.load(args.data)
unique_students = set(entry['fields']['student'] for entry in data)
if args.count > len(unique_students) * 0.1:
raise Exception("Can't be sufficiently anonymous selecting {count} of {unique} students".format(
count=args.count, unique=len(unique_students)))
by_problems = defaultdict(list)
for entry in data:
by_problems[entry['fields']['module_id']].append(entry)
out_data = []
out_pk = 1
for name, answers in by_problems.items():
for student_id in xrange(args.count):
sample = random.choice(answers)
data = copy.deepcopy(sample)
data["fields"]["student"] = student_id + 1
data["fields"]["created"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
data["fields"]["modified"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
data["pk"] = out_pk
out_pk += 1
out_data.append(data)
for student_id in xrange(args.count):
out_data.append(generate_user(student_id))
json.dump(out_data, args.output, indent=2)
if __name__ == "__main__":
sys.exit(main())