Downloads last month:
{{ "{:,.0f}".format(recent['month']) }}
+
+ 7-day smoothing
{% endblock %}
diff --git a/pypistats/views/general.py b/pypistats/views/general.py
index def523f..81c3b21 100644
--- a/pypistats/views/general.py
+++ b/pypistats/views/general.py
@@ -107,7 +107,7 @@ def package_page(package):
recent[r.category] = r.downloads
# PyPI metadata
- metadata = None
+ metadata = dict()
if package != "__all__":
try:
metadata = requests.get(f"https://pypi.python.org/pypi/{package}/json", timeout=5).json()
@@ -139,8 +139,13 @@ def package_page(package):
else:
metrics = ["downloads", "percentages"]
+ use_smoothing = metadata['use_smoothing'] = request.args.get('smooth', None) is not None
for metric in metrics:
- model_data.append({"metric": metric, "name": model.__tablename__, "data": data_function[metric](records)})
+ model_data.append({
+ "metric": metric,
+ "name": model.__tablename__,
+ "data": data_function[metric](records, use_smoothing=use_smoothing),
+ })
# Build the plots
plots = []
@@ -191,7 +196,20 @@ def package_page(package):
return render_template("package.html", package=package, plots=plots, metadata=metadata, recent=recent, user=g.user)
-def get_download_data(records):
+def smooth_data(data, window=7):
+ # Ensure data is sorted by date
+ data["x"], data["y"] = zip(*[(x, y) for x, y in sorted(
+ zip(data["x"], data["y"]), key=lambda pair: pair[0])])
+ # Smooth data with a trailing window, so recent days are as accurate as possible
+ smoothed_data = deepcopy(data)
+ smoothed_data["y"] = list(smoothed_data["y"])
+ for i in range(window, len(data["y"])):
+ window_data = data["y"][max(0, i - window):i]
+ smoothed_data["y"][i] = sum(window_data) / len(window_data)
+ return smoothed_data
+
+
+def get_download_data(records, use_smoothing=False):
"""Organize the data for the absolute plots."""
data = defaultdict(lambda: {"x": [], "y": []})
@@ -241,54 +259,33 @@ def get_download_data(records):
if category not in date_categories:
data[category]["x"].append(str(records[-1].date))
data[category]["y"].append(0)
- return data
-
-
-def get_proportion_data(records):
- """Organize the data for the fill plots."""
- data = defaultdict(lambda: {"x": [], "y": [], "text": []})
-
- date_categories = defaultdict(lambda: 0)
- all_categories = []
-
- prev_date = records[0].date
-
- for record in records:
- if record.category not in all_categories:
- all_categories.append(record.category)
-
- all_categories = sorted(all_categories)
- for category in all_categories:
- data[category] # set the dict value (keeps it ordered)
- for record in records:
- if record.date != prev_date:
+ if use_smoothing:
+ # Smooth data using a 7-day window
+ for category in all_categories:
+ data[category] = smooth_data(data[category])
- total = sum(date_categories.values()) / 100
- for category in all_categories:
- data[category]["x"].append(str(prev_date))
- value = date_categories[category] / total
- data[category]["y"].append(value)
- data[category]["text"].append("{0:.2f}%".format(value) + " = {:,}".format(date_categories[category]))
+ return data
- date_categories = defaultdict(lambda: 0)
- prev_date = record.date
- # Track categories for this date
- date_categories[record.category] = record.downloads
- else:
- # Fill in missing final date with zeros
- total = sum(date_categories.values()) / 100
- for category in all_categories:
- if category not in date_categories:
- data[category]["x"].append(str(records[-1].date))
- data[category]["y"].append(0)
- data[category]["text"].append("{0:.2f}%".format(0) + " = {:,}".format(0))
- else:
- data[category]["x"].append(str(records[-1].date))
- value = date_categories[category] / total
- data[category]["y"].append(value)
- data[category]["text"].append("{0:.2f}%".format(value) + " = {:,}".format(date_categories[category]))
+def get_proportion_data(records, use_smoothing=False):
+ """Organize the data for the fill plots."""
+ # Get the absolute numbers as a starting point, to handle fills etc.
+ # Note that this means we smooth *then* calculate proportions, which
+ # is the correct order to avoid inflating random noise.
+ data = get_download_data(records, use_smoothing=use_smoothing)
+
+ # Calculate the per-day sum of all y-values to divide by.
+ all_ys = [category_values["y"] for category_values in data.values()]
+ totals = [sum(chunk) or 1 for chunk in zip(*all_ys)]
+
+ # and finally divide each category by totals and add detailed labels
+ for category_values in data.values():
+ ys = category_values["y"]
+ category_values["y"] = [y / t for y, t in zip(ys, totals)]
+ category_values["text"] = [
+ "{:.2f}% = {:,}".format(p, a) for p, a in zip(ys, category_values["y"])
+ ]
return data