Skip to content

Commit 90d88d0

Browse files
committed
rename gitlab_stat to gitlab_user_contributions
1 parent e1b6ec8 commit 90d88d0

File tree

1 file changed

+322
-0
lines changed

1 file changed

+322
-0
lines changed

gitlab_user_contributions.py

Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
import configparser
2+
import requests
3+
from collections import defaultdict
4+
import csv
5+
import concurrent.futures
6+
import threading
7+
import logging
8+
9+
# 配置日志记录
10+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
11+
12+
# 读取配置文件
13+
def read_config(config_file):
14+
config = configparser.ConfigParser()
15+
config.read(config_file, encoding="utf-8")
16+
try:
17+
return {
18+
"url": config.get("gitlab", "url").rstrip('/'), # 去除末尾的斜杠
19+
"token": config.get("gitlab", "token"),
20+
"start_date": config.get("gitlab", "start_date"),
21+
"end_date": config.get("gitlab", "end_date"),
22+
"csv_file": config.get("gitlab", "csv_file"),
23+
"worker_threads": config.getint("performance", "worker_threads", fallback=10), # 默认值为10
24+
}
25+
except (configparser.NoSectionError, configparser.NoOptionError) as e:
26+
logging.error(f"配置错误: {e}")
27+
exit(1)
28+
29+
# 获取所有项目
30+
def get_all_projects(url, token):
31+
api_url = f"{url}/projects"
32+
projects = []
33+
page = 1
34+
while True:
35+
try:
36+
response = requests.get(
37+
api_url,
38+
headers={"PRIVATE-TOKEN": token},
39+
params={"per_page": 100, "page": page}
40+
)
41+
response.raise_for_status() # 检查 HTTP 错误
42+
data = response.json()
43+
if not data:
44+
break
45+
projects.extend(data)
46+
page += 1
47+
except requests.exceptions.RequestException as e:
48+
logging.error(f"获取项目时出错: {e}")
49+
break
50+
logging.info(f"获取所有项目完成,共 {len(projects)} 个项目")
51+
return projects
52+
53+
# 获取指定时间范围内的提交记录
54+
def get_commits(project_id, url, token, start_date, end_date):
55+
api_url = f"{url}/projects/{project_id}/repository/commits"
56+
commits = []
57+
page = 1
58+
while True:
59+
try:
60+
response = requests.get(
61+
api_url,
62+
headers={"PRIVATE-TOKEN": token},
63+
params={
64+
"since": start_date,
65+
"until": end_date,
66+
"per_page": 100,
67+
"page": page
68+
}
69+
)
70+
response.raise_for_status()
71+
data = response.json()
72+
logging.info(f"项目 {project_id} 获取到 {len(data)} 个提交,当前页: {page}")
73+
if not data:
74+
break
75+
commits.extend(data)
76+
page += 1
77+
except requests.exceptions.RequestException as e:
78+
logging.error(f"获取项目 {project_id} 的提交记录时出错: {e}")
79+
break
80+
return commits
81+
82+
# 获取提交的详细信息(包括代码行变化)
83+
def get_commit_details(project_id, commit_id, url, token):
84+
api_url = f"{url}/projects/{project_id}/repository/commits/{commit_id}"
85+
try:
86+
response = requests.get(
87+
api_url,
88+
headers={"PRIVATE-TOKEN": token}
89+
)
90+
response.raise_for_status()
91+
data = response.json()
92+
return data.get("stats", {"additions": 0, "deletions": 0})
93+
except requests.exceptions.RequestException as e:
94+
logging.error(f"获取项目 {project_id} 的提交详情时出错: {e}")
95+
return {"additions": 0, "deletions": 0}
96+
97+
# 获取指定时间范围内的合并请求
98+
def get_merge_requests(project_id, url, token, start_date, end_date):
99+
api_url = f"{url}/projects/{project_id}/merge_requests"
100+
merge_requests = []
101+
page = 1
102+
while True:
103+
try:
104+
response = requests.get(
105+
api_url,
106+
headers={"PRIVATE-TOKEN": token},
107+
params={
108+
"created_after": start_date,
109+
"created_before": end_date,
110+
"per_page": 100,
111+
"page": page
112+
}
113+
)
114+
response.raise_for_status()
115+
data = response.json()
116+
logging.info(f"项目 {project_id} 获取到 {len(data)} 个合并请求,当前页: {page}")
117+
if not data:
118+
break
119+
merge_requests.extend(data)
120+
page += 1
121+
except requests.exceptions.RequestException as e:
122+
logging.error(f"获取项目 {project_id} 的合并请求时出错: {e}")
123+
break
124+
return merge_requests
125+
126+
# 获取用户信息 by email
127+
def get_user_by_email(email, url, token):
128+
api_url = f"{url}/users"
129+
try:
130+
response = requests.get(
131+
api_url,
132+
headers={"PRIVATE-TOKEN": token},
133+
params={"search": email}
134+
)
135+
response.raise_for_status()
136+
data = response.json()
137+
if data:
138+
return data[0] # 假设搜索结果中第一个匹配的是正确的用户
139+
return None
140+
except requests.exceptions.RequestException as e:
141+
logging.error(f"获取用户信息时出错: {e}")
142+
return None
143+
144+
# 获取用户信息 by username
145+
def get_user_by_username(username, url, token):
146+
api_url = f"{url}/users"
147+
try:
148+
response = requests.get(
149+
api_url,
150+
headers={"PRIVATE-TOKEN": token},
151+
params={"username": username}
152+
)
153+
response.raise_for_status()
154+
data = response.json()
155+
if data:
156+
return data[0] # 假设搜索结果中第一个匹配的是正确的用户
157+
return None
158+
except requests.exceptions.RequestException as e:
159+
logging.error(f"获取用户信息时出错: {e}")
160+
return None
161+
162+
# 建立提交者映射表
163+
def build_contributor_mapping(url, token):
164+
mapping = {}
165+
page = 1
166+
while True:
167+
try:
168+
response = requests.get(
169+
f"{url}/users",
170+
headers={"PRIVATE-TOKEN": token},
171+
params={"per_page": 100, "page": page}
172+
)
173+
response.raise_for_status()
174+
data = response.json()
175+
if not data:
176+
break
177+
for user in data:
178+
email = user.get("email")
179+
username = user.get("username")
180+
user_id = user.get("id")
181+
fullname = user.get("name", "Unknown")
182+
if email:
183+
mapping[email] = {"id": user_id, "username": username, "fullname": fullname}
184+
if username:
185+
mapping[username] = {"id": user_id, "username": username, "fullname": fullname}
186+
page += 1
187+
except requests.exceptions.RequestException as e:
188+
logging.error(f"建立提交者映射表时出错: {e}")
189+
break
190+
logging.info(f"建立提交者映射表完成,共 {len(mapping)} 个用户")
191+
return mapping
192+
193+
# 处理单个项目的数据
194+
def process_project(project, url, token, start_date, end_date, stats, lock, contributor_mapping):
195+
project_id = project["id"]
196+
project_name = project["path_with_namespace"]
197+
logging.info(f"处理项目: {project_name} (ID: {project_id})")
198+
199+
# 统计提交
200+
commits = get_commits(project_id, url, token, start_date, end_date)
201+
logging.info(f"项目 {project_name} 获取到 {len(commits)} 个提交")
202+
for commit in commits:
203+
author_email = commit.get("author_email")
204+
author_name = commit.get("author_name")
205+
206+
if not author_email and not author_name:
207+
logging.warning(f"提交 {commit['id']} 没有作者邮箱和作者名称")
208+
continue # 如果没有作者邮箱和作者名称,则跳过该提交
209+
210+
# 尝试通过邮箱查找用户信息
211+
user_info = contributor_mapping.get(author_email) if author_email else None
212+
if not user_info:
213+
# 尝试通过作者名称查找用户信息
214+
user_info = contributor_mapping.get(author_name) if author_name else None
215+
216+
if not user_info:
217+
logging.warning(f"提交 {commit['id']} 无法找到对应的用户信息")
218+
continue # 如果没有找到对应的用户,则跳过该提交
219+
220+
author_id = user_info.get("id")
221+
author_username = user_info.get("username", "Unknown")
222+
author_fullname = user_info.get("fullname", "Unknown")
223+
224+
commit_id = commit["id"]
225+
commit_stats = get_commit_details(project_id, commit_id, url, token)
226+
227+
with lock:
228+
if author_id not in stats:
229+
stats[author_id] = {
230+
"username": author_username,
231+
"fullname": author_fullname,
232+
"commit_count": 0,
233+
"merge_request_count": 0,
234+
"additions": 0,
235+
"deletions": 0,
236+
"projects": set()
237+
}
238+
stats[author_id]["commit_count"] += 1
239+
stats[author_id]["additions"] += commit_stats.get("additions", 0)
240+
stats[author_id]["deletions"] += commit_stats.get("deletions", 0)
241+
stats[author_id]["projects"].add(project_id)
242+
243+
# 统计合并请求
244+
merge_requests = get_merge_requests(project_id, url, token, start_date, end_date)
245+
logging.info(f"项目 {project_name} 获取到 {len(merge_requests)} 个合并请求")
246+
for mr in merge_requests:
247+
author_id = mr.get("author", {}).get("id")
248+
author_username = mr.get("author", {}).get("username", "Unknown")
249+
author_fullname = mr.get("author", {}).get("name", "Unknown")
250+
251+
if not author_id or not author_username:
252+
logging.warning(f"合并请求 {mr['id']} 无法找到对应的用户信息")
253+
continue # 如果没有找到对应的用户,则跳过该合并请求
254+
255+
with lock:
256+
if author_id not in stats:
257+
stats[author_id] = {
258+
"username": author_username,
259+
"fullname": author_fullname,
260+
"commit_count": 0,
261+
"merge_request_count": 0,
262+
"additions": 0,
263+
"deletions": 0,
264+
"projects": set()
265+
}
266+
stats[author_id]["merge_request_count"] += 1
267+
stats[author_id]["projects"].add(project_id)
268+
269+
# 主逻辑
270+
def main():
271+
# 读取配置
272+
config = read_config("config.ini")
273+
url = config["url"]
274+
token = config["token"]
275+
start_date = config["start_date"]
276+
end_date = config["end_date"]
277+
csv_file = config["csv_file"]
278+
worker_threads = config["worker_threads"]
279+
280+
# 获取所有项目
281+
logging.info("获取所有项目...")
282+
projects = get_all_projects(url, token)
283+
284+
# 建立提交者映射表
285+
logging.info("建立提交者映射表...")
286+
contributor_mapping = build_contributor_mapping(url, token)
287+
288+
# 初始化统计字典和锁
289+
stats = defaultdict(dict)
290+
lock = threading.Lock()
291+
292+
# 使用线程池并行处理项目
293+
with concurrent.futures.ThreadPoolExecutor(max_workers=worker_threads) as executor:
294+
futures = [executor.submit(process_project, project, url, token, start_date, end_date, stats, lock, contributor_mapping) for project in projects]
295+
for future in concurrent.futures.as_completed(futures):
296+
try:
297+
future.result()
298+
except Exception as e:
299+
logging.error(f"处理项目时出错: {e}")
300+
301+
# 写入 CSV 文件
302+
with open(csv_file, mode="w", newline="", encoding="utf-8") as file:
303+
writer = csv.writer(file)
304+
writer.writerow([
305+
"GitLab用户ID", "用户名", "全名", "提交次数", "合并请求次数", "增加代码行数", "删除代码行数", "参与项目数"
306+
])
307+
for author_id, counts in sorted(stats.items(), key=lambda x: x[1]["commit_count"], reverse=True):
308+
writer.writerow([
309+
author_id,
310+
counts["username"],
311+
counts["fullname"],
312+
counts["commit_count"],
313+
counts["merge_request_count"],
314+
counts["additions"],
315+
counts["deletions"],
316+
len(counts["projects"]) # 统计参与的项目总数
317+
])
318+
319+
logging.info(f"结果已保存到 {csv_file}")
320+
321+
if __name__ == "__main__":
322+
main()

0 commit comments

Comments
 (0)