Recent Entries 10
- pattern minor 112d agoPlot president's approval rating of president with lowest approval rating by day of presidencyI was curious which U.S. president had the lowest approval rating for each day in their presidency. For example, which president had the lowest approval rating on day 42, and what was the rating. I downloaded the data from here and built this code to visualize it. I'm particularly interested in feedback regarding anything inefficient or clumsy that I'm doing. I want the code to be clean and professional looking. This might be out of the scope of this site but any thoughts on how to visualize the data more effectively would be welcome as well. ``` # Here are the imports that we'll use import os import pandas as pd from datetime import datetime from collections import Counter import matplotlib.patches as mpatches import matplotlib.pyplot as plt from matplotlib import font_manager as fm ''' Here's the path to all the data. The data were copied from http://www.presidency.ucsb.edu/data/popularity.php and saved as tsv files. ''' djt_path = os.getcwd() + '/data/djt.tsv' bho_path = os.getcwd() + '/data/bho.tsv' gwb_path = os.getcwd() + '/data/gwb.tsv' wjc_path = os.getcwd() + '/data/wjc.tsv' ghwb_path = os.getcwd() + '/data/ghwb.tsv' rwr_path = os.getcwd() + '/data/rwr.tsv' jec_path = os.getcwd() + '/data/jec.tsv' grf_path = os.getcwd() + '/data/grf.tsv' rmn_path = os.getcwd() + '/data/rmn.tsv' lbj_path = os.getcwd() + '/data/lbj.tsv' jfk_path = os.getcwd() + '/data/jfk.tsv' dde_path = os.getcwd() + '/data/dde.tsv' hst_path = os.getcwd() + '/data/hst.tsv' # Now let's read in all the data djt = pd.read_table(djt_path) bho = pd.read_table(bho_path) gwb = pd.read_table(gwb_path) wjc = pd.read_table(wjc_path) ghwb = pd.read_table(ghwb_path) rwr = pd.read_table(rwr_path) jec = pd.read_table(jec_path) grf = pd.read_table(grf_path) rmn = pd.read_table(rmn_path) lbj = pd.read_table(lbj_path) jfk = pd.read_table(jfk_path) dde = pd.read_table(dde_path) hst = pd.read_table(hst_path) # The first Gallup poll for this question was on 07/22/1941, which was in # FDR's third term, so
- pattern minor 112d agoReaching the philosophy wiki pageI've written a class that will start from a random Wikipedia page, then choose the first link in the main body, and then navigate following the links until it finds the Philosophy page. When I run the `testCrawler()` method, it crawls starting from 20 pages and then plots the lengths of all of the paths. This generally works but I just want to confirm that the code looks clean/intuitive. Points of concern: As there are a vast amount of edge cases, I have multiple try/except blocks. Does this look unwieldy? Also, the point of graphing the path lengths is to try to see what kind of distribution the path lengths form. If the graph looks like it's 'normal' can I assume normality? Or is there a better way to do this (an automated way)? ``` import requests from lxml.html import fromstring import json from bs4 import BeautifulSoup,NavigableString, Tag import sys import matplotlib.pyplot as plt import numpy as np reload(sys) sys.setdefaultencoding('utf-8') class Crawler(): ''' Class used to crawl wikipedia pages starting from a random article. ''' def __init__(self): self.baseUrl = "https://en.wikipedia.org" def reformatString(self,char,word): '''Remove passed in char from a string and convert its characters to lowercase ''' word = word.lower() charIdx = word.find(char) if charIdx != -1: return word[:charIdx] return word def checkNameMatch(self,heading,string): '''Determine whether or not any part of the article heading is in the string and vice versa ''' for i in range(len(string)): for j in range(len(heading)): if heading[j] in string[i] or string[i] in heading[j]: return True return False def tokenize(self, word): '''Split the passed in 'word' on space characters and return a list of tokens ''' tokens = [] currWord = "" for i in range(len(word)):
- pattern minor 112d agoCoordinates of 2D points zoomed in/outThis might be a simple issue that I am overcomplicating, but I've spent quite some time reading about polygon scaling and I've come to the conclusion that it is not precisely what I need. Given a set of `(x, y)` coordinates for `N` points (shown in blue) I need the new set of coordinates that result after zooming in/out a given scale factor (shown in red). I've come up with the simple method shown below, but I wonder if there might be another approach and/or more reasonable zooming methods. ``` import numpy as np import matplotlib.pyplot as plt import matplotlib.patches as patches N = 5 xy = [np.random.uniform(0., 1000., 2) for _ in range(N)] x, y = zip(*xy) # Center of xy points, defined as the center of the minimal rectangle that # contains all points. xy_center = ((min(x) + max(x)) * .5, (min(y) + max(y)) * .5) # Difference between the center coordinates and the xy points. delta_x, delta_y = xy_center[0] - x, xy_center[1] - y # Zoom scale (0. < scale) scale = 1.5 # Scaled xy points. x_scale = xy_center[0] - scale * delta_x y_scale = xy_center[1] - scale * delta_y ax = plt.subplot(111) # Original xy points. ax.scatter(x, y, c='b') # Defined center. ax.scatter(*xy_center, marker='x', c='g') # Zoomed points. ax.scatter(x_scale, y_scale, c='r') # Square: bottom left corner, width, height ax.add_patch( patches.Rectangle( (min(x), min(y)), (max(x) - min(x)), (max(y) - min(y)), fill=False)) plt.show() ```
- pattern minor 112d agoPlotting a rectangular prismI'm creating a rectangular prism function, whose output looks like this: I think that this code can be improved by optimizing the use of `np.meshgrid` with a Python iterator, but I can't wrap my head around it. It might also be possible to do this with fewer plotting calls, but I can't figure that out either. Ideally, I would change the line drawing to use a Line3DCollection and the areas to use a Patch3DCollection for plotting speed, but I'm still not comfortable enough with the 3D api. ``` from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt import numpy as np fig = plt.figure() ax = fig.gca(projection='3d') ax.set_aspect("equal") # draw cube def rect_prism(x_range, y_range, z_range): # TODO: refactor this to use an iterator xx, yy = np.meshgrid(x_range, y_range) ax.plot_wireframe(xx, yy, z_range[0], color="r") ax.plot_surface(xx, yy, z_range[0], color="r", alpha=0.2) ax.plot_wireframe(xx, yy, z_range[1], color="r") ax.plot_surface(xx, yy, z_range[1], color="r", alpha=0.2) yy, zz = np.meshgrid(y_range, z_range) ax.plot_wireframe(x_range[0], yy, zz, color="r") ax.plot_surface(x_range[0], yy, zz, color="r", alpha=0.2) ax.plot_wireframe(x_range[1], yy, zz, color="r") ax.plot_surface(x_range[1], yy, zz, color="r", alpha=0.2) xx, zz = np.meshgrid(x_range, z_range) ax.plot_wireframe(xx, y_range[0], zz, color="r") ax.plot_surface(xx, y_range[0], zz, color="r", alpha=0.2) ax.plot_wireframe(xx, y_range[1], zz, color="r") ax.plot_surface(xx, y_range[1], zz, color="r", alpha=0.2) rect_prism(np.array([-1, 1]), np.array([-1, 1]), np.array([-0.5, 0.5])) plt.show() ```
- pattern minor 112d agoReal time graph simulation of dice tossingWhen tossing a dice many many time all numbers tend to appear the same number of times, but if the number of throws is small then some numbers may appear more or less often than others, even considerably if the sample size is small enough. This program shows a updating in real time graph of `SIDES` bars (6 in default), each presenting how many times a number has appeared so far. Please ignore that the numbers on the X axis are wrong, that is a minor bug-fix for later, all of the visualization is there. My code feels a bit messy, please help in organization and simplification: ``` import random import numpy as np import matplotlib.pyplot as plt import matplotlib.animation as animation SIDES = 6 HEIGHT = 100 def update_results(results): die_toss = random.randint(0, SIDES - 1) return [x + (1 if i == die_toss else 0) for i, x in enumerate(results)] def plot_points(ps): plt.scatter(*zip(*ps)) def animate(frameno): global x x = update_results(x) n, _ = np.histogram(x, SIDES) n = x for rect, h in zip(patches, n): rect.set_height(h) print(x, max(x) / float(min(x))) return patches if __name__ == "__main__": x = [ 1 for _ in range(SIDES) ] fig, ax = plt.subplots() n, bins, patches = plt.hist(x, SIDES, normed=1, facecolor='green', alpha=0.75) frames = 500 ani = animation.FuncAnimation(fig, animate) axes = plt.gca() axes.set_ylim([0, HEIGHT]) plt.show() plt.show() for i in range(100): plt.bar(range(SIDES), x) x =update_results(x) print(x) plt.show() plt.clf() ```
- pattern minor 112d agoPlotting different parameterized polynomsFor a university assignment I had to plot different polynomial functions depending on one single parameter. That parameter gave the number of supporting points to interpolate a given function in the domain \$\lbrack -1.0, 1.0 \rbrack \$. The supporting points were to be calculated in two different fashions. Either they were to be equidistant or be Chebyshev nodes. The given definitions were: $$ x_i = \frac{2i}{n} - 1 , \quad x_i = \cos \frac{(2i + 1)\pi}{2(n + 1)} $$ The plots are to be handed in in a pdf. The polynomial functions I had to calculate were given as: \$\Phi_n(x) = \underset{i \neq j}{\underset{i = 0}{\overset{n}{\Pi}}} (x - x_i) \$ and the slightly more complicated \$\lambda(x) = \underset{i = 0}{\overset{n}{\Sigma}} \lvert l_{i,n}(x) \rvert \$. Here \$l_{i,n}(x)\$ denotes a Lagrange polynomial. I'll just stop torturing you with math definitions, (because I'm reasonably sure I'm able to copy a formula from a script into code). Note that \$\Phi_n\$ is called "Supporting point polynomial" and \$\lambda\$ is called "Lebesgue function" in the assignment. So without further ado, here's my code. Note that maintainabiltiy for future use is not a concern, so if you want you can mention docstrings and variable names, but those points don't really help me :) ``` import numpy as np import matplotlib.pyplot as plt def equidistant_points(count): points = [] for i in np.arange(0, count): points.append((2 * i / count) - 1) return points def tschebyscheff_points(count): points = [] for i in np.arange(0, count): points.append(np.cos(((2 * i + 1) * np.pi) / (2 * (count + 1)))) return points def as_supporting_point_poly(points, x): poly = 1 for point in points: poly = poly * (x - point) return poly def lagrange_poly(points, j, x): poly = 1 for i in range(0, len(points)): if (i != j): poly = poly * ((x - points[i]) / (points[j] - points[i])) return poly def lebesgu
- pattern minor 112d agoAbstract graphing-and-timing functionsI like to use `timeit` to measure performance of small, and sometimes big, functions. However it can be uncertain as to what the 'actual' time to execute the function is. It also doesn't clearly show trends in function execution times. Originally I wanted to measure the difference between list comprehensions and using `append` continuously. I wanted it to be simple to use, and so tried to abstract the graphing as much as possible. This lead to having to provide the components to `timeit` at class level. And a flattened list of graphs at function/instance level. My code still suffers from a couple of the above problems, but reduces them by a reasonable amount. The module is, 'graphtimer.py': ``` from timeit import timeit from functools import partial CATEGORY10 = '#1f77b4 #ff7f0e #2ca02c #d62728 #9467bd #8c564b #e377c2 #7f7f7f #bcbd22 #17becf'.split() CATEGORY20 = '#1f77b4 #aec7e8 #ff7f0e #ffbb78 #2ca02c #98df8a #d62728 #ff9896 #9467bd #c5b0d5 #8c564b #c49c94 #e377c2 #f7b6d2 #7f7f7f #c7c7c7 #bcbd22 #dbdb8d #17becf #9edae5'.split() CATEGORY20b = '#393b79 #5254a3 #6b6ecf #9c9ede #637939 #8ca252 #b5cf6b #cedb9c #8c6d31 #bd9e39 #e7ba52 #e7cb94 #843c39 #ad494a #d6616b #e7969c #7b4173 #a55194 #ce6dbd #de9ed6'.split() CATEGORY20c = '#3182bd #6baed6 #9ecae1 #c6dbef #e6550d #fd8d3c #fdae6b #fdd0a2 #31a354 #74c476 #a1d99b #c7e9c0 #756bb1 #9e9ac8 #bcbddc #dadaeb #636363 #969696 #bdbdbd #d9d9d9'.split() def _time(setup, fn, amount=1000, number=1000000, command='fn(a)'): return timeit(command, setup.format(fn, amount), number=number) def time(*args, **kwargs): return partial(_time, *args, **kwargs) def flat(axes): if 'flat' in dir(axes): return axes.flat try: return [axis for row in axes for axis in row] except TypeError: return [axes] class GraphTimer: functions = [] inputs = [] domain = [] titles = [] colors = CATEGORY10 def _average_and_error_area(self, axis): for results in axis:
- pattern minor 112d agoPlotting from a Pandas dataframeI want to improve my code. Is it possible to get the plot without repeating the same instructions multiple lines? The data comes from a Pandas' dataframe, but I am only plotting the last column (Total Acc.) ``` e=df['Total Acc'].round(4)*100 #The repetitions start: row_awa = e.loc['AWA'] row_rem = e.loc['REM'] row_s1 = e.loc['S1'] row_s2 = e.loc['S2'] row_sws = e.loc['SWS'] row_stades = e.loc['stades'] #More repetitions row_awa.plot() row_rem.plot() row_s1.plot() row_s2.plot() row_sws.plot() row_stades.plot() myLegend=plt.legend(bbox_to_anchor=(0., 1.2, 1., .102), prop ={'size':10}, loc=10, ncol=4, #left, bottom, width, height title=r'TOTAL ACCURACY FOR MANY K-FOLD') #loc='center' myLegend.get_title().set_fontsize('20') ```
- pattern minor 112d agoMultigeneration evolution simulator, graphing phenotypic changeI created an evolution simulator. It takes random chance and applies it to phenotypes of species. This was very much for fun, and I would love any input on: - Readability of code - Efficiency of generation generating - Better ways to create dynamic GUI elements - Future mod-ability - My usage of classes, something I'm historically not great at - Any tips on how to improve how the program looks as well as behaves Also, please feel completely free to just run the program for fun! It is (I hope) cool to see how natural disasters will affect certain phenotypes in a population and how changing the chances of things like mutations and natural disasters affects the population as a whole! I had a lot of fun playing around with the different outcomes. A quick overview of the buttons in the GUI: - Quit: quits the program - Export profile: Saves all the current settings to a file that you can access later, using... - Load profile: loads a presaved .profile file - `NUM_ORG`: original number of organisms in the population - `OPT_OFF_NUM`: optimal number of offspring - `NAT_DIS_FREQ`: Frequency of natural disasters, use number between 0 and 100 - `GEN_FREQ`: how fast the generations reproduce, in seconds - `POP_LIM`: upper limit of the population (between 1000 and 9999) - `FREQ_MUT`: likelihood of a mutation occurring in an organism - `MAX_MUT`: maximum number of mutations in the population - `GEN_NUM`: Number of generations (it works pretty quickly, but results may vary) - `EXECUTE MAIN`: Runs the main function, generation a population list - `GRAPH`: generates the graph based off of the settings above the button - Checkboxes: allows you to control what is graphed. For example, unchecking the first box removes the "heat-resistant" organisms from the graph - Show Natural Disaster Lines?: Draws a line straight down from a natural disaster to show what generation it occurred at Resultant graph: Program in action: ``` #--------------------------------------
- pattern minor 112d agoPlot heat map from csv file using numpy and matplotlibThere's a csv file with format: ``` x0, y0, v00 x0, y1, v01 ... x1, y0 v10 ... ``` And what I want to do is to plot a heat map, in which at location (x, y) the value v is plotted with corresponding color. Below is my current implementation. ``` import random import numpy as np import matplotlib.pyplot as plt def create_test_csv(file): random.seed(42) f = open(file, "w") for x in range(300): for y in range(600): value = random.randrange(255) f.write(str(x) + "," + str(y) + "," + str(value) + "\n") def get_xyz_from_csv_file(csv_file_path): ''' get x, y, z value from csv file csv file format: x0,y0,z0 ''' x = [] y = [] z = [] map_value = {} for line in open(csv_file_path): list = line.split(",") temp_x = float(list[0]) temp_y = float(list[1]) temp_z = float(list[2]) x.append(temp_x) y.append(temp_y) z.append(temp_z) map_value[(temp_x, temp_y)] = temp_z return x, y, map_value def draw_heatmap(x, y, map_value): plt_x = np.asarray(list(set(x))) plt_y = np.asarray(list(set(y))) plt_z = np.zeros(shape = (len(plt_x), len(plt_y))) for i in range(len(plt_x)): for j in range(len(plt_y)): if map_value.has_key((plt_x.item(i), plt_y.item(j))): plt_z[i][j] = map_value[(plt_x.item(i), plt_y.item(j))] z_min = plt_z.min() z_max = plt_z.max() plt_z = np.transpose(plt_z) plot_name = "demo" color_map = plt.cm.gist_heat #plt.cm.rainbow #plt.cm.hot #plt.cm.gist_heat plt.clf() plt.pcolor(plt_x, plt_y, plt_z, cmap=color_map, vmin=z_min, vmax=z_max) plt.axis([plt_x.min(), plt_x.max(), plt_y.min(), plt_y.max()]) plt.title(plot_name) plt.colorbar().set_label(plot_name, rotation=270) ax = plt.gca() ax.set_aspect('equal') figure = plt.gcf() plt.show() return figure if __name__ == "__main__": csv_file_nam