################################################################################ # 2014-06-11 # This file contains the instructions to generate the files with # Python 2.7 and Mathematica ################################################################################ ################################################################################ # Mathematica ################################################################################ # Put the files # - dblp_top_clean.nb # - dblp_bias_stat_clean.nb # - A_D_Proof.nb # - Vcolor_simple_v1.m # as well as # - dblp_edges_maxAuthor20_mentee_frequent_known_degree_v1.csv # - dblp_edges_maxAuthor20_simple_degree_1000_edges_v1.csv # - dblp_nodes_maxAuthor20_simple_degree_1000_nodes_v1.csv # - dblp_id_gender_dict_v1.csv # in the same directory and open the notebooks. ################################################################################ # Python ################################################################################ # Unzip ./data/dblp-2013-12-23.zip to ./data/dblp-2013-12-23.xml and # run the following command line instructions from this directory: # "Parsing xml, create (multi) collaboration graph for all publications with at most 20 co-authors" python ./code/parse_DBLP_xml.py './data/dblp-2013-12-23.xml' './output/dblp_edges_maxAuthor20.csv' './output/dblp_id_name.csv' 20 # "Compute (simple) co-author graph from collaboration (multi) graph" python ./code/convert_to_simple_graph.py './output/dblp_edges_maxAuthor20.csv' # "Create (sorted) id degree list from (simple) co-author graph" python ./code/create_id_degree_file.py './output/dblp_edges_maxAuthor20_simple.csv' # "Extract top node list of authors with known gender and induced subgraph of co-authors" python ./code/extract_top_graph.py './data/dblp_id_gender_dict_v0.dict' './output/dblp_edges_maxAuthor20_simple.csv' './output/dblp_edges_maxAuthor20_simple_id_degree.csv' python ./code/extract_top_graph.py './data/dblp_id_gender_dict_v1.dict' './output/dblp_edges_maxAuthor20_simple.csv' './output/dblp_edges_maxAuthor20_simple_id_degree.csv' # "Compute mentor/mentee" python ./code/create_mentor_file.py './output/dblp_edges_maxAuthor20.csv' './output/dblp_edges_maxAuthor20_mentee_frequent.csv' # "Compute known mentoring degree" python ./code/compute_known_degree.py './data/dblp_id_gender_dict_v0.dict' './output/dblp_edges_maxAuthor20_mentee_frequent.csv' python ./code/compute_known_degree.py './data/dblp_id_gender_dict_v1.dict' './output/dblp_edges_maxAuthor20_mentee_frequent.csv'