Browse Source

Adds career length data and changes plotting over to bokeh

Caleb Fangmeier 6 years ago
parent
commit
df0b0b2798
9 changed files with 665 additions and 47 deletions
  1. 1 0
      .idea/.name
  2. 11 0
      .idea/US_Congress_Demographics.iml
  3. 4 0
      .idea/misc.xml
  4. 8 0
      .idea/modules.xml
  5. 6 0
      .idea/vcs.xml
  6. 469 0
      .idea/workspace.xml
  7. 153 43
      analyze.py
  8. 13 4
      get_congress_data.py
  9. BIN
      us_congress_members.sqlite3

+ 1 - 0
.idea/.name

@@ -0,0 +1 @@
+US_Congress_Demographics

+ 11 - 0
.idea/US_Congress_Demographics.iml

@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
+  </component>
+</module>

+ 4 - 0
.idea/misc.xml

@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (US_Congress_Domographics)" project-jdk-type="Python SDK" />
+</project>

+ 8 - 0
.idea/modules.xml

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/US_Congress_Demographics.iml" filepath="$PROJECT_DIR$/.idea/US_Congress_Demographics.iml" />
+    </modules>
+  </component>
+</project>

+ 6 - 0
.idea/vcs.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>

+ 469 - 0
.idea/workspace.xml

@@ -0,0 +1,469 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="12a2f774-8336-4b7d-95c4-377921e0ee2b" name="Default" comment="">
+      <change beforePath="" afterPath="$PROJECT_DIR$/.idea/vcs.xml" />
+      <change beforePath="$PROJECT_DIR$/analyze.py" afterPath="$PROJECT_DIR$/analyze.py" />
+      <change beforePath="$PROJECT_DIR$/get_congress_data.py" afterPath="$PROJECT_DIR$/get_congress_data.py" />
+      <change beforePath="$PROJECT_DIR$/us_congress_members.sqlite3" afterPath="$PROJECT_DIR$/us_congress_members.sqlite3" />
+    </list>
+    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
+    <option name="TRACKING_ENABLED" value="true" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="FileEditorManager">
+    <leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
+      <file leaf-file-name="get_congress_data.py" pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/get_congress_data.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="1188">
+              <caret line="66" column="31" lean-forward="false" selection-start-line="66" selection-start-column="31" selection-end-line="66" selection-end-column="31" />
+              <folding />
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file leaf-file-name="analyze.py" pinned="false" current-in-tab="true">
+        <entry file="file://$PROJECT_DIR$/analyze.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="864">
+              <caret line="211" column="0" lean-forward="false" selection-start-line="211" selection-start-column="0" selection-end-line="211" selection-end-column="0" />
+              <folding>
+                <element signature="e#0#19#0" expanded="true" />
+                <element signature="e#472#776#0" expanded="false" />
+                <element signature="e#809#1262#0" expanded="false" />
+                <element signature="e#1336#1866#0" expanded="false" />
+                <element signature="e#1959#2390#0" expanded="false" />
+                <element signature="e#2448#2919#0" expanded="false" />
+                <element signature="e#2958#4952#0" expanded="false" />
+                <element signature="e#5006#6303#0" expanded="false" />
+                <element signature="e#6356#7707#0" expanded="false" />
+              </folding>
+            </state>
+          </provider>
+        </entry>
+      </file>
+    </leaf>
+  </component>
+  <component name="FileTemplateManagerImpl">
+    <option name="RECENT_TEMPLATES">
+      <list>
+        <option value="Jupyter Notebook" />
+        <option value="Python Script" />
+      </list>
+    </option>
+  </component>
+  <component name="Git.Settings">
+    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
+  </component>
+  <component name="IdeDocumentHistory">
+    <option name="CHANGED_PATHS">
+      <list>
+        <option value="$PROJECT_DIR$/load_data.py" />
+        <option value="$PROJECT_DIR$/US_Congress_Demographics.ipynb" />
+        <option value="$PROJECT_DIR$/get_data.py" />
+        <option value="$PROJECT_DIR$/get_data.py" />
+        <option value="$PROJECT_DIR$/get_congress_data.py" />
+        <option value="$PROJECT_DIR$/analyze.py" />
+      </list>
+    </option>
+  </component>
+  <component name="ProjectFrameBounds">
+    <option name="x" value="1925" />
+    <option name="y" value="27" />
+    <option name="width" value="1915" />
+    <option name="height" value="2133" />
+  </component>
+  <component name="ProjectLevelVcsManager" settingsEditedManually="true" />
+  <component name="ProjectView">
+    <navigator currentView="ProjectPane" proportions="" version="1">
+      <flattenPackages />
+      <showMembers />
+      <showModules />
+      <showLibraryContents />
+      <hideEmptyPackages />
+      <abbreviatePackageNames />
+      <autoscrollToSource />
+      <autoscrollFromSource />
+      <sortByType />
+      <manualOrder />
+      <foldersAlwaysOnTop value="true" />
+    </navigator>
+    <panes>
+      <pane id="Scope" />
+      <pane id="Scratches" />
+      <pane id="ProjectPane">
+        <subPane>
+          <expand>
+            <path>
+              <item name="US_Congress_Demographics" type="b2602c69:ProjectViewProjectNode" />
+              <item name="US_Congress_Demographics" type="462c0819:PsiDirectoryNode" />
+            </path>
+          </expand>
+          <select />
+        </subPane>
+      </pane>
+    </panes>
+  </component>
+  <component name="PropertiesComponent">
+    <property name="settings.editor.selected.configurable" value="com.jetbrains.python.documentation.PythonDocumentationConfigurable" />
+    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
+  </component>
+  <component name="RunDashboard">
+    <option name="ruleStates">
+      <list>
+        <RuleState>
+          <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
+        </RuleState>
+        <RuleState>
+          <option name="name" value="StatusDashboardGroupingRule" />
+        </RuleState>
+      </list>
+    </option>
+  </component>
+  <component name="RunManager" selected="Python.analyze">
+    <configuration name="analyze" type="PythonConfigurationType" factoryName="Python" temporary="true">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="US_Congress_Demographics" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/analyze.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+    </configuration>
+    <configuration name="get_congress_data" type="PythonConfigurationType" factoryName="Python" temporary="true">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="US_Congress_Demographics" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/get_congress_data.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+    </configuration>
+    <configuration name="get_data" type="PythonConfigurationType" factoryName="Python" temporary="true">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="US_Congress_Domographics" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/get_data.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+    </configuration>
+    <configuration default="true" type="Tox" factoryName="Tox">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs />
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="" />
+      <option name="IS_MODULE_SDK" value="false" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="US_Congress_Domographics" />
+    </configuration>
+    <configuration default="true" type="tests" factoryName="Doctests">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs />
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="" />
+      <option name="IS_MODULE_SDK" value="false" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="US_Congress_Domographics" />
+      <option name="SCRIPT_NAME" value="" />
+      <option name="CLASS_NAME" value="" />
+      <option name="METHOD_NAME" value="" />
+      <option name="FOLDER_NAME" value="" />
+      <option name="TEST_TYPE" value="TEST_SCRIPT" />
+      <option name="PATTERN" value="" />
+      <option name="USE_PATTERN" value="false" />
+    </configuration>
+    <configuration default="true" type="tests" factoryName="Nosetests">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs />
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="" />
+      <option name="IS_MODULE_SDK" value="false" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="US_Congress_Domographics" />
+      <option name="_new_regexPattern" value="&quot;&quot;" />
+      <option name="_new_additionalArguments" value="&quot;&quot;" />
+      <option name="_new_target" value="&quot;&quot;" />
+      <option name="_new_targetType" value="&quot;PATH&quot;" />
+    </configuration>
+    <configuration default="true" type="tests" factoryName="Twisted Trial">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs />
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="" />
+      <option name="IS_MODULE_SDK" value="false" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="US_Congress_Domographics" />
+      <option name="_new_additionalArguments" value="&quot;&quot;" />
+      <option name="_new_target" value="&quot;&quot;" />
+      <option name="_new_targetType" value="&quot;PATH&quot;" />
+    </configuration>
+    <configuration default="true" type="tests" factoryName="Unittests">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs />
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="" />
+      <option name="IS_MODULE_SDK" value="false" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="US_Congress_Domographics" />
+      <option name="_new_additionalArguments" value="&quot;&quot;" />
+      <option name="_new_target" value="&quot;&quot;" />
+      <option name="_new_targetType" value="&quot;PATH&quot;" />
+    </configuration>
+    <configuration default="true" type="tests" factoryName="py.test">
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs />
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="" />
+      <option name="IS_MODULE_SDK" value="false" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <module name="US_Congress_Domographics" />
+      <option name="_new_keywords" value="&quot;&quot;" />
+      <option name="_new_additionalArguments" value="&quot;&quot;" />
+      <option name="_new_target" value="&quot;&quot;" />
+      <option name="_new_targetType" value="&quot;PATH&quot;" />
+    </configuration>
+    <list size="3">
+      <item index="0" class="java.lang.String" itemvalue="Python.get_data" />
+      <item index="1" class="java.lang.String" itemvalue="Python.analyze" />
+      <item index="2" class="java.lang.String" itemvalue="Python.get_congress_data" />
+    </list>
+    <recent_temporary>
+      <list size="3">
+        <item index="0" class="java.lang.String" itemvalue="Python.analyze" />
+        <item index="1" class="java.lang.String" itemvalue="Python.get_congress_data" />
+        <item index="2" class="java.lang.String" itemvalue="Python.get_data" />
+      </list>
+    </recent_temporary>
+  </component>
+  <component name="ShelveChangesManager" show_recycled="false">
+    <option name="remove_strategy" value="false" />
+  </component>
+  <component name="SvnConfiguration">
+    <configuration />
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="12a2f774-8336-4b7d-95c4-377921e0ee2b" name="Default" comment="" />
+      <created>1514666705563</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1514666705563</updated>
+    </task>
+    <task id="LOCAL-00001" summary="Commit of routine to scrape congress data as well as process IPUMS Data for a basic plot">
+      <created>1514942026941</created>
+      <option name="number" value="00001" />
+      <option name="presentableId" value="LOCAL-00001" />
+      <option name="project" value="LOCAL" />
+      <updated>1514942026941</updated>
+    </task>
+    <option name="localTasksCounter" value="2" />
+    <servers />
+  </component>
+  <component name="ToolWindowManager">
+    <frame x="1925" y="27" width="1915" height="2133" extended-state="0" />
+    <editor active="true" />
+    <layout>
+      <window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.1535248" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
+      <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
+      <window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.329849" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" />
+      <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.20810677" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
+      <window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
+      <window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
+      <window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
+      <window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
+      <window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.39990115" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
+      <window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" />
+      <window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
+      <window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
+      <window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
+      <window_info id="Documentation" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32994792" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" x="0" y="0" width="910" height="1961" />
+      <window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
+      <window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
+      <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
+      <window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
+    </layout>
+  </component>
+  <component name="VcsContentAnnotationSettings">
+    <option name="myLimit" value="2678400000" />
+  </component>
+  <component name="VcsManagerConfiguration">
+    <MESSAGE value="Commit of routine to scrape congress data as well as process IPUMS Data for a basic plot" />
+    <option name="LAST_COMMIT_MESSAGE" value="Commit of routine to scrape congress data as well as process IPUMS Data for a basic plot" />
+  </component>
+  <component name="XDebuggerManager">
+    <breakpoint-manager>
+      <breakpoints-dialog>
+        <breakpoints-dialog />
+      </breakpoints-dialog>
+      <option name="time" value="4" />
+    </breakpoint-manager>
+    <watches-manager />
+  </component>
+  <component name="editorHistoryManager">
+    <entry file="file://$PROJECT_DIR$/get_congress_data.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="828">
+          <caret line="46" column="48" lean-forward="false" selection-start-line="46" selection-start-column="48" selection-end-line="46" selection-end-column="48" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/analyze.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="2358">
+          <caret line="150" column="33" lean-forward="false" selection-start-line="150" selection-start-column="33" selection-end-line="150" selection-end-column="33" />
+          <folding>
+            <element signature="e#0#19#0" expanded="true" />
+            <element signature="e#472#776#0" expanded="false" />
+            <element signature="e#809#1262#0" expanded="false" />
+            <element signature="e#1336#1866#0" expanded="false" />
+            <element signature="e#1959#2390#0" expanded="false" />
+            <element signature="e#2448#2919#0" expanded="false" />
+            <element signature="e#2958#4952#0" expanded="false" />
+            <element signature="e#5006#6303#0" expanded="false" />
+            <element signature="e#6356#7707#0" expanded="false" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/get_congress_data.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="0">
+          <caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/analyze.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="576">
+          <caret line="34" column="86" lean-forward="false" selection-start-line="34" selection-start-column="86" selection-end-line="34" selection-end-column="86" />
+          <folding>
+            <element signature="e#0#19#0" expanded="true" />
+            <element signature="e#472#776#0" expanded="false" />
+            <element signature="e#809#1262#0" expanded="false" />
+            <element signature="e#1336#1866#0" expanded="false" />
+            <element signature="e#1959#2390#0" expanded="false" />
+            <element signature="e#2448#2919#0" expanded="false" />
+            <element signature="e#2958#4952#0" expanded="false" />
+            <element signature="e#5006#6303#0" expanded="false" />
+            <element signature="e#6356#7707#0" expanded="false" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/get_congress_data.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="0">
+          <caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/load_data.py" />
+    <entry file="file://$PROJECT_DIR$/get_congress_data.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="0">
+          <caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/load_data.py" />
+    <entry file="file://$PROJECT_DIR$/US_Congress_Demographics.ipynb" />
+    <entry file="file://$PROJECT_DIR$/load_data.py" />
+    <entry file="file://$PROJECT_DIR$/get_congress_data.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="1188">
+          <caret line="66" column="31" lean-forward="false" selection-start-line="66" selection-start-column="31" selection-end-line="66" selection-end-column="31" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file:///usr/lib64/python3.6/json/encoder.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="0">
+          <caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
+          <folding />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/analyze.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="864">
+          <caret line="211" column="0" lean-forward="false" selection-start-line="211" selection-start-column="0" selection-end-line="211" selection-end-column="0" />
+          <folding>
+            <element signature="e#0#19#0" expanded="true" />
+            <element signature="e#472#776#0" expanded="false" />
+            <element signature="e#809#1262#0" expanded="false" />
+            <element signature="e#1336#1866#0" expanded="false" />
+            <element signature="e#1959#2390#0" expanded="false" />
+            <element signature="e#2448#2919#0" expanded="false" />
+            <element signature="e#2958#4952#0" expanded="false" />
+            <element signature="e#5006#6303#0" expanded="false" />
+            <element signature="e#6356#7707#0" expanded="false" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+  </component>
+  <component name="masterDetails">
+    <states>
+      <state key="ScopeChooserConfigurable.UI">
+        <settings>
+          <splitter-proportions>
+            <option name="proportions">
+              <list>
+                <option value="0.2" />
+              </list>
+            </option>
+          </splitter-proportions>
+        </settings>
+      </state>
+    </states>
+  </component>
+</project>

+ 153 - 43
analyze.py

@@ -1,15 +1,28 @@
-import matplotlib.pyplot as plt
 import pandas as pd
 import numpy as np
+from bokeh.plotting import figure
+from bokeh.models import HoverTool
+
+binning_age_max = 110
+binning_career_max = 70
+
+
+def save_fig(fig, filename):
+    from bokeh.embed import components
+    script, div = components(fig)
+
+    with open(f'output/{filename}.js', 'w') as f:
+        f.write(script)
+    with open(f'output/{filename}.html', 'w') as f:
+        f.write(div)
 
 
 def percentile_from_pdf(pdf, bin_centers, percentile=0.5):
     cdf = 0
-    for pdf_val, bin_low in zip(pdf, bin_centers[:-1]):
+    for pdf_val, bin_low, bin_high in zip(pdf, bin_centers[:-1], bin_centers[1:]):
+        if cdf+pdf_val > percentile:
+            return bin_low + (percentile - cdf)*(bin_high-bin_low) / pdf_val
         cdf += pdf_val
-        if cdf > percentile:
-            return bin_low  # TODO: Interpolate
-    print(pdf, bin_centers)
     raise ValueError(f"couldn't find percentile: {percentile}, cdf: {cdf}" )
 
 
@@ -26,39 +39,62 @@ def pdf_stats(pdf, bins):
     return Stats((pdf, bins), mean, median, quart_high, quart_low)
 
 
-def get_stats_congress(year, age_max=110, parties=None, states=None):
+def get_congress(year, parties=None, states=None, positions=None):
     query = f'''\
-SELECT yob, position, party FROM Member
-    WHERE congress={year} AND position IN ("Representative", "Senator")
-'''
+SELECT yob, career_length FROM Member
+    WHERE congress={year}'''
+    if positions:
+        query += ' AND position IN (' + ', '.join(f'"{position}"' for position in positions) + ')'
     if parties:
-        query += ' AND party IN (' + ", ".join(f'"{party}"' for party in parties) + ')'
+        query += ' AND party IN (' + ', '.join(f'"{party}"' for party in parties) + ')'
     if states:
-        query += ' AND state IN (' + ", ".join(f'"{state}"' for state in states) + ')'
+        query += ' AND state IN (' + ', '.join(f'"{state}"' for state in states) + ')'
     data = pd.read_sql_query(query, 'sqlite:///us_congress_members.sqlite3')
     data['age'] = year - data.yob
+    return data
 
-    pdf, bins = np.histogram(data.age, bins=age_max, range=(0, age_max), density=True)
-    return pdf_stats(pdf, bins)
 
+def get_stats_congress(year, age_or_term, parties=None, states=None, positions=None):
+    data = get_congress(year, parties, states, positions)
+    if len(data) == 0:
+        return None
 
-def get_stats_genpop(year_data, age_max=110):
-    pdf, bins = np.histogram(year_data.AGE, bins=age_max, range=(0, age_max), weights=year_data.PERWT, density=True)
+    if age_or_term == "Age":
+        pdf, bins = np.histogram(data.age, bins=binning_age_max, range=(0, binning_age_max), density=True)
+    else:
+        pdf, bins = np.histogram(data.career_length, bins=binning_career_max, range=(0, binning_career_max),
+                                 density=True)
     return pdf_stats(pdf, bins)
 
 
-def plot_pdf(genpop_stats, congress_stats):
-    import matplotlib.pyplot as plt
-    genpop_pdf, genpop_bins = genpop_stats.hist
-    congress_pdf, congress_bins = congress_stats.hist
+def get_stats_genpop(year, sex=None, states=None):
+    query = f'''\
+SELECT perwt, age FROM person
+    WHERE year={year}
+'''
+    if sex:
+        query += f' AND sex={sex}'
+    if states:
+        query += f' AND statefip IN (' + ', '.join(f'{state}' for state in states) + ')'
+    data = pd.read_sql_query(query, 'sqlite:///usa_00001.sqlite3')
+    pdf, bins = np.histogram(data.age, bins=binning_age_max, range=(0, binning_age_max), weights=data.perwt,
+                             density=True)
+    return pdf_stats(pdf, bins)
 
-    plt.plot(genpop_bins[:-1], genpop_pdf, 'r.', label='U.S. Population')
-    plt.plot(congress_bins[:-1], congress_pdf, 'b.', label='Congress')
-    plt.legend()
-    plt.show()
 
+def plot_yearly_stats(figname):
+    hover = HoverTool(tooltips=[(f'Age', "@y{00.0}")],
+                      mode='vline')
+    fig = figure(tools=[hover, 'pan', 'wheel_zoom', 'save', 'reset'],
+                 sizing_mode='scale_width', plot_width=700, plot_height=450,
+                 toolbar_location="right")
+    genpop_stats = {}
+    congress_stats = {}
+    for year in [1850, 1860, 1870, 1880, 1900, 1910, 1920, 1930, 1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010, 2016]:
+        genpop_stats[year] = get_stats_genpop(year)
+    for year in range(1850, 2017):
+        congress_stats[year] = get_stats_congress(year, 'Age', positions=['Senator', 'Representative'])
 
-def plot_yearly_stats(congress_stats, genpop_stats):
     congress_years = []
     congress_medians = []
     congress_quart_highs = []
@@ -79,26 +115,100 @@ def plot_yearly_stats(congress_stats, genpop_stats):
         genpop_quart_highs.append(year_stats.quart_high)
         genpop_quart_lows.append(year_stats.quart_low)
 
-    plt.fill_between(genpop_years, genpop_medians, genpop_quart_highs, color='b', alpha=0.3)
-    plt.fill_between(genpop_years, genpop_medians, genpop_quart_lows, color='b', alpha=0.3)
-    plt.plot(genpop_years, genpop_medians, 'b', label='U.S. Population')
-    plt.fill_between(congress_years, congress_medians, congress_quart_highs, color='r', alpha=0.3)
-    plt.fill_between(congress_years, congress_medians, congress_quart_lows, color='r', alpha=0.3)
-    plt.plot(congress_years, congress_medians, 'r', label='Congress')
-    plt.legend()
-    plt.grid()
-    plt.ylabel('Age')
-    plt.xlabel('Year')
-    plt.show()
+    def do_plot(years, medians, quart_highs, quart_lows, color, label):
+        fig.patch(years + years[::-1], quart_highs + quart_lows[::-1], fill_color=color, alpha=0.3)
+        fig.line(years, medians, line_color=color, line_width=2, legend=label, level='overlay')
+
+    do_plot(genpop_years, genpop_medians, genpop_quart_highs, genpop_quart_lows, 'blue', 'U.S. Population')
+    do_plot(congress_years, congress_medians, congress_quart_highs, congress_quart_lows, 'red', 'Congress')
+
+    fig.legend.location = 'bottom_right'
+    fig.xaxis.axis_label = 'Year'
+    fig.yaxis.axis_label = 'Age'
+    fig.y_range.start = 0
+
+    save_fig(fig, figname)
+
+
+def plot_partisan_stats(age_or_term, figname):
+    hover = HoverTool(tooltips=[(f'{age_or_term}', "@y{00.0}")],
+                      mode='vline')
+
+    fig = figure(tools=[hover, 'pan', 'wheel_zoom', 'save', 'reset'],
+                 sizing_mode='scale_width', plot_width=700, plot_height=450,
+                 toolbar_location="right")
+    parties = [('Republican', 'red'), ('Democrat', 'blue')]
+    for (party, color) in parties:
+        stats = {}
+        for year in range(1850, 2017):
+            stat = get_stats_congress(year, age_or_term, parties=[party], positions=['Senator', 'Representative'])
+            if stat:
+                stats[year] = stat
+
+        years = []
+        medians = []
+        quart_highs = []
+        quart_lows = []
+        for year, year_stats in stats.items():
+            years.append(year)
+            medians.append(year_stats.median)
+            quart_highs.append(year_stats.quart_high)
+            quart_lows.append(year_stats.quart_low)
+
+        fig.patch(years + years[::-1], quart_highs + quart_lows[::-1], fill_color=color, alpha=0.3)
+        fig.line(years, medians, line_color=color, line_width=2, legend=party, level='overlay')
+
+    fig.legend.location = 'bottom_right'
+    fig.xaxis.axis_label = 'Year'
+    fig.yaxis.axis_label = age_or_term
+    fig.y_range.start = 0
+
+    save_fig(fig, figname)
+
+
+def plot_chamber_stats(age_or_term, figname):
+    from bokeh.models import HoverTool
+    hover = HoverTool(tooltips=[(f'{age_or_term}', "@y{00.0}")],
+                      mode='vline')
+
+    fig = figure(tools=[hover, 'pan', 'wheel_zoom', 'save', 'reset'],
+                 sizing_mode='scale_width', plot_width=700, plot_height=450,
+                 toolbar_location="right")
+
+    chambers = [('Senator', 'Senate', 'red'), ('Representative', 'House', 'blue')]
+    for (position, chamber, color) in chambers:
+        stats = {}
+        for year in range(1850, 2017):
+            stat = get_stats_congress(year, age_or_term, positions=[position])
+            if stat is not None:
+                stats[year] = stat
+
+        years = []
+        medians = []
+        quart_highs = []
+        quart_lows = []
+        for year, year_stats in stats.items():
+            years.append(year)
+            medians.append(year_stats.median)
+            quart_highs.append(year_stats.quart_high)
+            quart_lows.append(year_stats.quart_low)
+
+        fig.patch(years + years[::-1], quart_highs + quart_lows[::-1], fill_color=color, alpha=0.3)
+        fig.line(years, medians, line_color=color, line_width=2, legend=chamber, level='overlay')
+
+    fig.legend.location = 'bottom_right'
+    fig.xaxis.axis_label = 'Year'
+    fig.yaxis.axis_label = age_or_term
+    fig.y_range.start = 0
+
+    save_fig(fig, figname)
 
 
 if __name__ == '__main__':
-    stats_genpop = {}
-    stats_congress = {}
-    for year in range(1850, 2017):
-        stats_congress[year] = get_stats_congress(year)
-    people = pd.read_csv('usa_00001.csv', usecols=['YEAR', 'AGE', 'PERWT'], index_col='YEAR')
-    for year in people.index.unique():
-        stats_genpop[year] = get_stats_genpop(people.loc[year])
-    plot_yearly_stats(stats_congress, stats_genpop)
+    plot_yearly_stats('congress_ages')
+    plot_chamber_stats('Age', 'chamber_age')
+    plot_chamber_stats('Career Length', 'chamber_career_length')
+    plot_partisan_stats('Age', 'partisan_age')
+    plot_partisan_stats('Career Length', 'partisan_career_length')
+
 

+ 13 - 4
get_congress_data.py

@@ -1,3 +1,7 @@
+from collections import defaultdict
+career_lengths = defaultdict(int)
+
+
 def pull_congress(year):
     from requests import post
     from bs4 import BeautifulSoup
@@ -25,11 +29,14 @@ def pull_congress(year):
 
     rows = rep_table.find_all('tr')[1:]
     name = ""
+    url = ""
     yob = ""
     while rows:
         row = rows.pop(0).find_all('td')
         try:
             name = row[0].a.get_text()
+            url = row[0].a['href']
+            career_lengths[url] += 1
             yob = parse_dob(row[1].get_text())
         except AttributeError:
             pass
@@ -38,8 +45,9 @@ def pull_congress(year):
             continue
         party = row[3].get_text()
         state = row[4].get_text()
+        career_length = career_lengths[url]
 
-        yield (name, yob, position, party, state, year)
+        yield (name, yob, position, party, state, year, career_length)
 
 
 def download_all():
@@ -53,13 +61,14 @@ def download_all():
                          position TEXT,
                          party TEXT,
                          state TEXT,
-                         congress INTEGER);
+                         congress INTEGER,
+                         career_length INTEGER);
 ''')
-    for year in range(1789, 2017):
+    for year in range(1789, 2018):
         print(f'Downloading for year: {year}')
         for rep in pull_congress(year):
             if rep is not None:
-                conn.execute('INSERT INTO Member VALUES (?,?,?,?,?,?);', rep)
+                conn.execute('INSERT INTO Member VALUES (?,?,?,?,?,?,?);', rep)
     conn.commit()
     conn.close()
 

BIN
us_congress_members.sqlite3