Render the contributors statistics page.
Source code in ckanext/nhm/routes/statistics.py
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171 | @blueprint.route('/contributors')
def contributors():
"""
Render the contributors statistics page.
"""
graph_data = []
# we use solr to get the number of authors for the front page statistics so we'll use it again
# here to get a per-package authors count. We have to use solr directly to do this because the
# package_search action doesn't allow the pivot options to be passed through
solr = make_connection()
results = (
solr.search(
'*:*',
**{
'fq': '+capacity:public +state:active',
'facet': 'true',
'facet.pivot': 'id,author',
'facet.pivot.mincount': 1,
'facet.limit': -1,
},
)
.facets.get('facet_pivot', {})
.get('id,author', [])
)
# turn the counts into a lookup from package_id -> number of authors. Note that the number of
# authors only includes authors we haven't seen before to avoid counting authors of multiple
# packages more than once
counts = {}
seen_authors = set()
for hit in results:
package_id = hit['value']
package_authors = set(author['value'] for author in hit.get('pivot', []))
# figure out which authors have not been counted yet
unseen_authors = package_authors.difference(seen_authors)
counts[package_id] = len(unseen_authors)
seen_authors.update(unseen_authors)
# retrieve the packages in the database ordered by creation time. We need this because we can't
# order the solr facets by created date
order = list(
model.Session.query(model.Package.id, model.Package.metadata_created)
.filter(model.Package.private == false())
.filter(model.Package.state == model.State.ACTIVE)
.order_by(model.Package.metadata_created)
)
# only do stuff if we have some packages
if order:
# always segment by day (further grouping can be done by d3)
extraction_format = '%Y-%m-%d'
# sum the counts by package creation time based on the extraction format we chose
grouped_ordered_data = OrderedDict()
for package_id, created in order:
# just in case the database isn't up to date with the solr index
if package_id not in counts:
continue
date_group = created.strftime(extraction_format)
if date_group not in grouped_ordered_data:
grouped_ordered_data[date_group] = 0
grouped_ordered_data[date_group] += counts[package_id]
total = 0
# run through the data, adding up a total as we go and adding the data to the graph
for i, (formatted_date, count) in enumerate(grouped_ordered_data.items()):
total += count
graph_data.append([formatted_date, total])
# compress the data
toolkit.c.graph_data = base64.b64encode(
zlib.compress(json.dumps(graph_data).encode(), level=9)
)
return toolkit.render(
'stats/contributors.html', {'title': 'Contributor statistics'}
)
|