SHARE
TWEET

Untitled

a guest Nov 10th, 2019 128 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env python
  2. # coding: utf-8
  3.  
  4. import pandas
  5. import datetime
  6. import math
  7. import os
  8.  
  9. #
  10. # HOW TO USE THIS PYTHON SCRIPT:
  11. # 0) this might only work on macOS / linux due to hardcoded file paths (sorry)
  12. # 1) install the pandas module (using pip, for example)
  13. # 2) obtain the following tables from the iron march database in csv format:
  14. #       forums_forums
  15. #       forums_topics
  16. #       forums_posts
  17. #
  18. #       core_message_posts
  19. #       core_members
  20. #       core_message_topics
  21. #
  22. #     The last three are included in the torrent file:
  23. #       magnet:?xt=urn:btih:f745eb1b86eb55f638517654c015fcaaadc96919&dn=iron_march_201911&tr=http%3a%2f%2fbt1.archive.org%3a6969%2fannounce&tr=http%3a%2f%2fbt2.archive.org%3a6969%2fannounce&ws=http%3a%2f%2fia601401.us.archive.org%2f17%2fitems%2f&ws=https%3a%2f%2fia801401.us.archive.org%2f17%2fitems%2f
  24. #
  25. #      The first three are not but
  26. #      they can be obtained by logging in to the DB hosted here:
  27. #           https://colgate.ankin.info/dbstuffs/index.php
  28. #           user: iron, pwd: march
  29. #      and exporting the tables forums_forums, forums_topics, forums_posts
  30. #      as CSV.
  31. #
  32. #      It is essential to include the column names as first row of CSV when
  33. #      exporting.
  34. #      To do this choose "custom" export method in the Export tab for each
  35. #      table,
  36. #      choose format "CSV",and check "Put column names in the first row".
  37. #
  38. # 3)    Put all six CSV files in a directory called "csv" in the
  39. #       same directory as this script.
  40. #       The directory structure should be as follows:
  41. #
  42. #       ironmarch.py
  43. #       csv/
  44. #       ├── forums_forums.csv
  45. #       ├── forums_topics.csv
  46. #       ├── forums_posts.csv
  47. #       ├── core_message_posts.csv
  48. #       ├── core_members.csv
  49. #       ├── core_message_topics.csv
  50. #
  51. #
  52. # 4)    Now run the ironmarch.py script.
  53. #       It will take a couple of minutes and generate a directory called
  54. #       "ironmarch" containing about 550 MB of static HTML.
  55. #
  56. # 5)    Open ironmarch/index.html in your browser.
  57. #
  58.  
  59.  
  60.  
  61.  
  62.  
  63. print("Loading csv files...")
  64.  
  65. forums_forums = pandas.read_csv('csv/forums_forums.csv')
  66.  
  67. forums_topics = pandas.read_csv('csv/forums_topics.csv')
  68.  
  69. forums_posts = pandas.read_csv('csv/forums_posts.csv')
  70.  
  71. messages = pandas.read_csv('csv/core_message_posts.csv')
  72.  
  73. members = pandas.read_csv('csv/core_members.csv')
  74.  
  75. message_topics = pandas.read_csv('csv/core_message_topics.csv')
  76.  
  77.  
  78. # join member info to posts
  79.  
  80. forums_posts = pandas.merge(forums_posts,
  81.                             members[['member_id',
  82.                                      'email',
  83.                                      'name',
  84.                                      'member_title']],
  85.                             how='left',
  86.                             left_on='author_id',
  87.                             right_on='member_id')
  88.  
  89. forums_posts = pandas.merge(forums_posts, forums_topics[['tid', 'title']],
  90.                             how='left', left_on='topic_id', right_on='tid')
  91.  
  92. joined = pandas.merge(messages[['msg_id', 'msg_topic_id', 'msg_date',
  93.                                 'msg_post', 'msg_author_id', 'msg_ip_address']],
  94.                        members[['member_id', 'name', 'email',
  95.                                 'ip_address', 'member_title']],
  96.                        how='left',
  97.                        left_on='msg_author_id', right_on='member_id')
  98.  
  99. joined = pandas.merge(joined, message_topics[['mt_id', 'mt_title']],
  100.                       how='left',
  101.                       left_on='msg_topic_id', right_on='mt_id')
  102.  
  103. message_topics = pandas.merge(message_topics, members[['member_id', 'name']],
  104.                               how='left',
  105.                               left_on='mt_starter_id', right_on='member_id')
  106. message_topics = message_topics.rename(columns={'name': 'started_by_name'})
  107.  
  108. message_topics = pandas.merge(message_topics, members[['member_id', 'name']],
  109.                               how='left',
  110.                               left_on='mt_to_member_id', right_on='member_id')
  111.  
  112. message_topics = message_topics.rename(columns={'name': 'to_member_name'})
  113.  
  114.  
  115.  
  116.  
  117. preamble = """<!DOCTYPE html><html><head><meta charset="UTF-8"><style>table {
  118.  border-collapse: collapse;
  119. }
  120.  
  121. td {
  122.    vertical-align:top;
  123. }
  124.  
  125. table {
  126.    width: 100%;
  127.    max-width: 1000px;
  128. }
  129.  
  130. img {
  131.    max-width: 500px;
  132. }
  133.  
  134. table, th, td {
  135.  border: 1px solid black;
  136.  valign: top;
  137. }</style></head><body>"""
  138.  
  139.  
  140.  
  141.  
  142. for d in ['ironmarch', 'ironmarch/members', 'ironmarch/topics',
  143.           'ironmarch/messages', 'ironmarch/forums']:
  144.     try:
  145.         os.mkdir(d)
  146.     except FileExistsError:
  147.         print("directory {} already exists".format(d))
  148.  
  149.  
  150.  
  151.  
  152. def dt(ts):
  153.     if pandas.isnull(ts):
  154.         return ''
  155.     else:
  156.         return datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
  157.  
  158.  
  159.  
  160. print("Processing public forum data...")
  161.  
  162. site_index = open('ironmarch/index.html', 'w')
  163. site_index.write(preamble)
  164. site_index.write('<h1>Forum list</h1>')
  165. site_index.write("""<h3><a href="members.html">Member list</a>;
  166.                    <a href="message_topics.html">PM topic list</a></h3>""")
  167. site_index.write("""<table>
  168. <tr>
  169.   <th>id</th>
  170.   <th>name_seo</th>
  171.   <th>last post</th>
  172.   <th># topics</th>
  173.   <th># posts</th>
  174. </tr>""")
  175. for u, f in forums_forums.iterrows():
  176.     site_index.write('<tr>')
  177.     site_index.write("""<td>{}</td>
  178. <td><a href="forums/forum_{}.html">{}</a>
  179. </td><td>{}</td><td>{}</td><td>{}</td>""".format(f['id'], f['id'],
  180.                                                  f['name_seo'],                                                                                     dt(f['last_post']),
  181.                                                  f['topics'], f['posts']))
  182.     site_index.write('</td></tr>')
  183.  
  184.     forum_file = open('ironmarch/forums/forum_{}.html'.format(f['id']), 'w')
  185.     forum_file.write(preamble)
  186.     forum_file.write('<h1>Forum: {}</h1><table>'.format(f['name_seo']))
  187.     forum_file.write("""<table>
  188.                         <tr>
  189.                           <th>id</th>
  190.                           <th>title</th>
  191.                           <th>last post</th>
  192.                           <th># posts</th>
  193.                         </tr>""")
  194.  
  195.     for i, t in forums_topics[forums_topics['forum_id'] == f['id']].iterrows():
  196.         forum_file.write('<tr>')
  197.         forum_file.write('<td>{}</td>'.format(t['tid']))
  198.         forum_file.write("""<td>
  199. <a href="../topics/topic_{}.html">{}</a></td>""".format(t['tid'], t['title']))
  200.         forum_file.write('<td>{}</td>'.format(dt(t['last_post'])))
  201.         forum_file.write('<td>{}</td>'.format(t['posts']))
  202.         forum_file.write('</tr>')
  203.  
  204.         topic_file = open('ironmarch/topics/topic_{}.html'.format(t['tid']),
  205.                           'w')
  206.         topic_file.write(preamble)
  207.         topic_file.write('<h1>{}</h1><table>'.format(t['title']))
  208.         for j, p in (forums_posts[forums_posts['topic_id'] == t['tid']]
  209.                      .sort_values('post_date').iterrows()):
  210.             topic_file.write("<tr>")
  211.             topic_file.write("<td width='30%'>")
  212.             topic_file.write("date: "+dt(p['post_date'])+"<br>")
  213.             topic_file.write("author member name: "
  214.                              "<a href='../members/member_{}.html'>{}</a>"
  215.                              "<br>".format(p['author_id'], p['name']))
  216.             topic_file.write("author member email: {}<br>".format(p['email']))
  217.             topic_file.write("author member title:"
  218.                              " {}<br>".format(p['member_title']))
  219.             topic_file.write("ip address: {}<br>".format(p['ip_address']))
  220.             topic_file.write("</td>")
  221.             topic_file.write("<td>")
  222.             topic_file.write(str(p['post']))
  223.             topic_file.write("</td>")
  224.             topic_file.write("</tr>")
  225.         topic_file.write('</table></body></html>')
  226.         topic_file.close()
  227.     forum_file.write('</table></body></html>')
  228.     forum_file.close()
  229. site_index.write('</table></body></html>')
  230. site_index.close()
  231.  
  232.  
  233. print("Processing PM data...")
  234.  
  235.  
  236. forum_file = open('ironmarch/message_topics.html', 'w')
  237. forum_file.write(preamble)
  238. forum_file.write('<table>')
  239. forum_file.write("""<table>
  240. <tr>
  241.   <th>date</th>
  242.   <th>title</th>
  243.   <th>started by</th>
  244.   <th>to</th>
  245. </tr>""")
  246. for i, t in message_topics.sort_values('mt_date').iterrows():
  247.     forum_file.write('<tr>')
  248.     forum_file.write('<td>{}</td>'.format(dt(t['mt_date'])))
  249.     forum_file.write('<td><a href="messages/message_topic_{}.html">{}</a>'
  250.                      '</td>'.format(t['mt_id'], t['mt_title']))
  251.     forum_file.write('<td><a href="members/member_{}.html">{}'
  252.                      '</td>'.format(t['mt_starter_id'], t['started_by_name']))
  253.     forum_file.write('<td><a href="members/member_{}.html">{}'
  254.                      '</td>'.format(t['mt_to_member_id'], t['to_member_name']))
  255.     forum_file.write('</tr>')
  256.  
  257.     topic_file = open('ironmarch/messages/'
  258.                       'message_topic_{}.html'.format(t['mt_id']), 'w')
  259.     topic_file.write(preamble)
  260.     topic_file.write('<h1>PM topic: {}</h1><table>'.format(t['mt_title']))
  261.     for j, p in (joined[joined['msg_topic_id'] == t['mt_id']]
  262.                  .sort_values('msg_date').iterrows()):
  263.         topic_file.write("<tr>")
  264.         topic_file.write("<td width='30%'>")
  265.         topic_file.write("date: "+dt(p['msg_date'])+"<br>")
  266.         topic_file.write("author member name: "
  267.                          "<a href='../members/member_{}.html'>{}</a>"
  268.                          "<br>".format(p['msg_author_id'], p['name']))
  269.         topic_file.write("author member email: {}<br>".format(p['email']))
  270.         topic_file.write("author member title: {}"
  271.                          "<br>".format(p['member_title']))
  272.         topic_file.write("msg ip address: {}<br>".format(p['msg_ip_address']))
  273.         topic_file.write("</td>")
  274.         topic_file.write("<td>")
  275.         topic_file.write(str(p['msg_post']))
  276.         topic_file.write("</td>")
  277.         topic_file.write("</tr>")
  278.     topic_file.write('</table></body></html>')
  279.     topic_file.close()
  280. forum_file.write('</table></body></html>')
  281. forum_file.close()
  282.  
  283.  
  284.  
  285.  
  286. MEMBER_FIELDS_TO_DISPLAY = ['member_id', 'email', 'joined', 'ip_address',
  287.        'allow_admin_mails', 'skin', 'warn_level', 'warn_lastwarn', 'language',
  288.        'restrict_post', 'bday_day', 'bday_month', 'bday_year', 'msg_count_new',
  289.        'msg_count_total', 'msg_count_reset', 'msg_show_notification', 'misc',
  290.        'last_visit', 'last_activity', 'mod_posts', 'auto_track', 'temp_ban',
  291.        'mgroup_others', 'member_login_key_expire', 'has_blog', 'has_gallery',
  292.        'members_seo_name', 'members_cache', 'members_disable_pm',
  293.        'failed_logins', 'failed_login_count', 'members_profile_views',
  294.        'members_pass_hash', 'members_pass_salt', 'members_bitoptions',
  295.        'fb_uid', 'members_day_posts', 'live_id', 'twitter_id', 'twitter_token',
  296.        'twitter_secret', 'notification_cnt', 'fb_token', 'blogs_recache',
  297.        'ipsconnect_id', 'ipsconnect_revalidate_url', 'google_id',
  298.        'linkedin_id', 'pp_last_visitors', 'pp_main_photo', 'pp_main_width',
  299.        'pp_main_height', 'pp_thumb_photo', 'pp_thumb_width', 'pp_thumb_height',
  300.        'pp_setting_count_comments', 'pp_reputation_points', 'pp_gravatar',
  301.        'pp_photo_type', 'signature', 'pconversation_filters', 'fb_photo',
  302.        'fb_photo_thumb', 'fb_bwoptions', 'tc_last_sid_import', 'tc_photo',
  303.        'tc_bwoptions', 'pp_customization', 'timezone', 'pp_cover_photo',
  304.        'profilesync', 'profilesync_lastsync', 'google_token', 'linkedin_token',
  305.        'live_token', 'members_bitoptions2', 'create_menu', 'marked_site_read',
  306.        'pp_cover_offset', 'acp_skin', 'acp_language', 'member_title',
  307.        'member_posts', 'member_last_post', 'member_streams',
  308.        'photo_last_update', 'pp_setting_count_visitors', 'pp_xbox']
  309.  
  310.  
  311. print("Processing member data...")
  312.  
  313. MEMBER_POSTS_PER_PAGE = 100
  314.  
  315. member_index = open('ironmarch/members.html', 'w')
  316. member_index.write(preamble)
  317. member_index.write("""<table>
  318. <tr>
  319.   <th>member_id</th>
  320.   <th>name</th>
  321.   <th>num posts</th>
  322.   <th>PM link</th>
  323.   <th>email</th>
  324. </tr>""")
  325. for i, m in members.iterrows():
  326.     member_index.write('<tr>')
  327.     member_index.write('<td>{}</td>'.format(m['member_id']))
  328.     member_index.write('<td><a href="members/member_{}.html">{}</a>'
  329.                        '</td>'.format(m['member_id'], m['name']))
  330.     member_index.write('<td><a href="members/member_{}_posts_1.html">{}</a>'
  331.                        '</td>'.format(m['member_id'], m['member_posts']))
  332.     member_index.write('<td><a href="members/member_{}_messages_1.html">PMs</a>'
  333.                        '</td>'.format(m['member_id']))
  334.     member_index.write('<td>{}</td>'.format(m['email']))
  335.     member_index.write('</tr>')
  336.  
  337.     member_file = open('ironmarch/members'
  338.                        '/member_{}.html'.format(m['member_id']), 'w')
  339.     member_file.write(preamble)
  340.     member_file.write('<h1>Member: {}</h1>'.format(m['name']))
  341.     member_file.write('<h2><a href="member_{}_posts_1.html">'
  342.                       'All forum posts</a></h2>'.format(m['member_id']))
  343.     member_file.write('<h2><a href="member_{}_messages_1.html">'
  344.                       'All messages</a></h2>'.format(m['member_id']))
  345.     member_file.write('<table>')
  346.     for field in MEMBER_FIELDS_TO_DISPLAY:
  347.         member_file.write('<tr><td>{}</td><td>{}</td>'
  348.                           '</td>'.format(field, m[field]))
  349.     member_file.write('</table>')
  350.     member_file.write('</body></html>')
  351.  
  352.     # MEMBER FORUM POSTS
  353.  
  354.     counter = 0
  355.     page_count = 1
  356.  
  357.     page_links = ""
  358.     n_pages = math.ceil(len(forums_posts[forums_posts['author_id']
  359.                         == m['member_id']])/MEMBER_POSTS_PER_PAGE)
  360.     for i in range(1, n_pages+1):
  361.         page_links += ("<a href='member_{}_posts_{}.html'>{}"
  362.                       "</a> ".format(m['member_id'], i, i))
  363.  
  364.     member_file = open('ironmarch/members/'
  365.                        'member_{}_posts_{}.html'.format(m['member_id'],
  366.                                                         page_count),
  367.                        'w')
  368.     member_file.write(preamble)
  369.     member_file.write('<h1>Forums posts of member {} : page {}'
  370.                       '</h1>'.format(m['name'], page_count))
  371.     member_file.write('Go to page: ' + page_links + '<br><table>')
  372.  
  373.     for j, p in (forums_posts[forums_posts['author_id'] ==
  374.                  m['member_id']].sort_values('post_date').iterrows()):
  375.         member_file.write("<tr>")
  376.         member_file.write("<td width='30%'>")
  377.         member_file.write("date: {}<br>".format(dt(p['post_date'])))
  378.         member_file.write("topic: <a href='../topics/topic_{}.html'>{}</a>"
  379.                           "<br>".format(p['topic_id'], p['title']))
  380.         member_file.write("ip address: {}<br>".format(p['ip_address']))
  381.         member_file.write("</td>")
  382.         member_file.write("<td>")
  383.         member_file.write(str(p['post']))
  384.         member_file.write("</td>")
  385.         member_file.write("</tr>")
  386.         counter = counter+1
  387.         if counter > MEMBER_POSTS_PER_PAGE:
  388.             page_count = page_count + 1
  389.  
  390.             member_file.write('</table>')
  391.             member_file.write('<a href="member_{}_posts_{}.html">'
  392.                               'Next page</a>'.format(m['member_id'],
  393.                                                      page_count))
  394.             member_file.write('</table></body></html>')
  395.             member_file.close()
  396.  
  397.             member_file = open('ironmarch/members/'
  398.                                'member_{}_posts_{}.html'.format(m['member_id'],
  399.                                                                 page_count),
  400.                                'w')
  401.             member_file.write(preamble)
  402.             member_file.write('<h1>Forums posts of member {} :'
  403.                               'page {}</h1>'.format(m['name'], page_count))
  404.             member_file.write('Go to page: ' + page_links + '<br><table>')
  405.  
  406.             counter = 0
  407.  
  408.     if not member_file.closed:
  409.         member_file.write('</table></body></html>')
  410.         member_file.close()
  411.  
  412.     # MEMBER MESSAGES
  413.  
  414.     counter = 0
  415.     page_count = 1
  416.  
  417.     page_links = ""
  418.     n_pages = math.ceil(len(joined[joined['msg_author_id'] ==
  419.                         m['member_id']])/MEMBER_POSTS_PER_PAGE)
  420.     for i in range(1, n_pages+1):
  421.         page_links += ("<a href='member_{}_messages_{}.html'>{}"
  422.                        "</a> ".format(m['member_id'], i, i))
  423.  
  424.     member_file = open('ironmarch/members/'
  425.                        'member_{}_messages_{}.html'.format(m['member_id'],
  426.                                                            page_count),
  427.                        'w')
  428.     member_file.write(preamble)
  429.     member_file.write('<h1>PMs of member {} : '
  430.                       'page {}</h1>'.format(m['name'], page_count))
  431.     member_file.write('Go to page: ' + page_links + '<br><table>')
  432.  
  433.     for j, p in (joined[joined['msg_author_id'] ==
  434.                         m['member_id']].sort_values('msg_date').iterrows()):
  435.         member_file.write("<tr>")
  436.         member_file.write("<td width='30%'>")
  437.         member_file.write("date: {}<br>".format(dt(p['msg_date'])))
  438.         member_file.write("topic: <a href='../messages/message_topic_{}.html'>"
  439.                           "{}</a><br>".format(p['msg_topic_id'], p['mt_title']))
  440.         member_file.write("ip address: {}<br>".format(p['ip_address']))
  441.         member_file.write("</td>")
  442.         member_file.write("<td>")
  443.         member_file.write(str(p['msg_post']))
  444.         member_file.write("</td>")
  445.         member_file.write("</tr>")
  446.         counter = counter+1
  447.         if counter > MEMBER_POSTS_PER_PAGE:
  448.             page_count = page_count + 1
  449.  
  450.             member_file.write('</table>')
  451.             member_file.write('<a href="member_{}_messages_{}.html">'
  452.                               'Next page</a>'.format(m['member_id'],
  453.                                                      page_count))
  454.             member_file.write('</table></body></html>')
  455.             member_file.close()
  456.  
  457.             member_file = open('ironmarch/members/member_{}_messages_{}'
  458.                                '.html'.format(m['member_id'], page_count), 'w')
  459.             member_file.write(preamble)
  460.             member_file.write('<h1>Messages of member {} :'
  461.                               'page {}</h1>'.format(m['name'], page_count))
  462.             member_file.write('Go to page: ' + page_links + '<br><table>')
  463.  
  464.             counter = 0
  465.  
  466.     if not member_file.closed:
  467.         member_file.write('</table></body></html>')
  468.         member_file.close()
  469.  
  470. member_index.write('</table></body></html>')
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top