gitlab按日期统计每个人提交代码量

今天接到老板命令,需要统计每个人每月的代码提交量,纳入每月的绩效考核(神奇的需求)。虽然可以通过查看 GitLab 上每个人的提交记录来统计,但是费时费力,所以这里通过脚本的方式来统计。

方法1:git命令行

git log --since=2024-05-01 --until=2024-05-28 --format='%aN' | sort -u | while read name; do echo -en "$name \t"; git log --author="$name" --pretty=tformat: --numstat | awk '{ add += $1; subs += $2; loc += $1 - $2 } END { printf "added lines: %s, removed lines: %s, total lines: %s\n", add, subs, loc }' -; done

方法2:gitlab api

apt install -y python3-dateutil
vi git-tj.py
#!/usr/bin/python3
# coding=utf8
# @Author: Mario
# @Date  : 2020/11/04
# @Desc  : GitLab 按时间查看各用户代码提交量官方API版
#token1 rw: KJAtyx6RJTFLsUrAWmZ-
#token2 ro: JasKPQyVR9q2zrxmCKG3
#import pandas as pd
import mysql.connector
import json,datetime
import requests
import argparse
from dateutil.parser import parse
gitlab_url = "https://git.kailinesb.com/"  # GitLab 地址
private_token = "JasKPQyVR9q2zrxmCKG3"  # GitLab Access Tokens(管理员权限)
info = []
headers = {
'Connection': 'close',
}
# UTC时间转时间戳
def utc_time(time):
dt = parse(time)
return int(dt.timestamp())
# 输出格式化
def str_format(txt):
lenTxt = len(txt)
lenTxt_utf8 = len(txt.encode('utf-8'))
size = int((lenTxt_utf8 - lenTxt) / 2 + lenTxt)
length = 20 - size
return length
# 获取 GitLab 上的所有项目ID和名称
def gitlab_projects_id_name():
project_id_name = []
#project_list = ["ng-kboss2","front","ops2","wechat-card"]
page = 1
#mysql插入存在则更新,不存在就插入
sql = "INSERT INTO project_id_name (project_id, project_name ) VALUES (%s, %s) ON DUPLICATE KEY UPDATE project_name=VALUES(project_name)"
while True:
url = gitlab_url + "api/v4/projects/?private_token=" + private_token + "&page=" + str(page) + "&per_page=20"
while True:
try:
res = requests.get(url, headers=headers, timeout=10)
break
except Exception as e:
print(e)
continue
projects = json.loads(res.text)
if len(projects) == 0:
break
else:
for project in projects:
res2="project_id:"+str(project["id"])+",project_name:"+project["name"]
project_id = project["id"]
project_name= project["name"]
data = (project_id,project_name)
project_id_name.append(res2)
print(res2)
to_mysql(sql,data)
page += 1
#return project_id_name
# 获取 GitLab 上的所有项目
def gitlab_projects(project_group):
project_ids = []
project_all = []
#project_list = ["ng-kboss2","front","ops2","wechat-card"]
page = 1
while True:
url = gitlab_url + "api/v4/projects/?private_token=" + private_token + "&page=" + str(page) + "&per_page=20"
while True:
try:
res = requests.get(url, headers=headers, timeout=10)
break
except Exception as e:
print(e)
continue
projects = json.loads(res.text)
if len(projects) == 0:
break
else:
for project in projects:
project_all.append(project["id"])
if project["namespace"]["name"] == project_group and not project["archived"]:
project_ids.append(project["id"])
page += 1
print("Gitlab总共 " + str(len(project_all)) + " 个项目,在 " + project_group + " 组下,共获取到 " + str(len(project_ids)) + " 个")
return project_ids
def gitlab_projects_id(project_id):
projects_ids = []
projects_ids.append(project_id)
print("处理项目id " + str(project_id) )
return projects_ids
# 获取 GitLab 上的项目 id 中的分支
def project_branches(project_id):
branch_names = ["master"]
return branch_names
page = 1
while True:
url = gitlab_url + "api/v4/projects/" + str(
project_id) + "/repository/branches?private_token=" + private_token + "&page=" + str(page) + "&per_page=20"
while True:
try:
res = requests.get(url, headers=headers, timeout=10)
break
except Exception as e:
print(e)
continue
branches = json.loads(res.text)
#'''Debug
print(url)
print('--' * 10)
print(branches)
print('*' * 10)
#'''
if len(branches) == 0:
break
else:
for branch in branches:
branch_names.append(branch["name"])
page += 1
return branch_names
# 获取 GitLab 上的项目分支中的 commits,当 title 或 message 首单词为 Merge 时,表示合并操作,剔除此代码量
def project_commits(project_id, branch, start_time, end_time):
commit_ids = []
page = 1
while True:
url = gitlab_url + "api/v4/projects/" + str(
project_id) + "/repository/commits?ref_name=" + branch + "&private_token=" + private_token + "&page=" + str(
page) + "&per_page=20"
while True:
try:
res = requests.get(url, headers=headers, timeout=10)
break
except Exception as e:
print(e)
continue
commits = json.loads(res.text)
if len(commits) == 0:
break
else:
for commit in commits:
if "Merge" in commit["title"] or "Merge" in commit["message"] or "合并" in commit["title"] or "合并" in \
commit["message"]:  # 不统计合并操作
continue
elif utc_time(commit["authored_date"]) < utc_time(start_time) or utc_time(
commit["authored_date"]) > utc_time(end_time):  # 不满足时间区间
continue
else:
commit_ids.append(commit["id"])
page += 1
return commit_ids
# 根据 commits 的 id 获取代码量
def commit_code(project_id, commit_id):
global info
url = gitlab_url + "api/v4/projects/" + str(
project_id) + "/repository/commits/" + commit_id + "?private_token=" + private_token
while True:
try:
res = requests.get(url, headers=headers, timeout=10)
break
except Exception as e:
print(e)
continue
data = json.loads(res.text)
temp = {"name": data["author_name"], "additions": data["stats"]["additions"],
"deletions": data["stats"]["deletions"], "total": data["stats"]["total"]}  # Git工具用户名,新增代码数,删除代码数,总计代码数
info.append(temp)
# GitLab 数据查询
def gitlab_info(start_time, end_time, project_group):
for project_id in gitlab_projects(project_group):  # 遍历所有项目ID
for branche_name in project_branches(project_id):  # 遍历每个项目中的分支
for commit_id in project_commits(project_id, branche_name, start_time, end_time):  # 遍历每个分支中的 commit id
commit_code(project_id, commit_id)  # 获取代码提交量
def gitlab_info_id(start_time, end_time, project_id):
for project_id in gitlab_projects_id(project_id):  # 遍历所有项目ID
for branche_name in project_branches(project_id):  # 遍历每个项目中的分支
for commit_id in project_commits(project_id, branche_name, start_time, end_time):  # 遍历每个分支中的 commit id
commit_code(project_id, commit_id)  # 获取代码提交量
def to_mysql(sql,data):
# 连接到 MySQL 数据库
mydb = mysql.connector.connect(
host="192.168.21.195",
user="root",
port="13306",
password="S8XbVyllyl",
database="gitlab_tj",
ssl_disabled='True'
)
# 创建游标对象
mycursor = mydb.cursor()
# 准备要插入的数据
#data = ("John", "Doe", "john.doe@example.com")
# 执行插入语句
#print(type(data))
#sql = "INSERT INTO code_lines (name, project_id, additions, deletions, total, start_time, end_time, update_time) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
mycursor.execute(sql, data)
#sql="select * from code_lines"
#mycursor.execute(sql)
#for row in mycursor:
#   print(row)
# 提交更改
mydb.commit()
# 打印插入的行数
print("save to mysql",mycursor.rowcount, "record inserted.")
# 关闭连接
mydb.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--start", required=True, help="开始时间,如2024-05-26")
parser.add_argument("--end", required=True, help="结束时间,如2024-05-29")
parser.add_argument("--group", help="gitlab组名称")
parser.add_argument("--id", type=int, help="gitlab项目ID")
parser.add_argument("--get", help="获取所有gitlab项目ID和名称")
args = parser.parse_args()
if args.group:
filename="gitlab_"+args.group+"_"+args.start+"_"+args.end+".csv"
print("正在统计数据,gitlab项目组:" + args.group +  ",分支:Master,开始时间:"+ args.start + " 结束时间:" + args.end + " 请耐心等待,这将花费不少时间~")
gitlab_info(args.start + " 00:00:00", args.end + " 23:59:59", args.group)
if args.id:
filename="gitlab_"+str(args.id)+"_"+args.start+"_"+args.end+".csv"
print("正在统计数据,gitlab项目id:" + str(args.id) +  ",分支:Master,开始时间:"+ args.start + " 结束时间:" + args.end + " 请耐心等待,这将花费不少时间~")
gitlab_info_id(args.start + " 00:00:00", args.end + " 23:59:59", args.id)
if args.get:
gitlab_projects_id_name()
exit()
#gitlab_info(args.start + " 00:00:00", args.end + " 23:59:59", args.group)  # 起-止时间
name = []  # Git工具用户名
additions = []  # 新增代码数
deletions = []  # 删除代码数
total = []  # 总计代码数
res = {}
# 生成元组
for i in info:
for key, value in i.items():
if key == "name":
name.append(value)
if key == "additions":
additions.append(value)
if key == "deletions":
deletions.append(value)
if key == "total":
total.append(value)
data = list(zip(name, additions, deletions, total))
# print(data)
# 去重累加
for j in data:
name = j[0]
additions = j[1]
deletions = j[2]
total = j[3]
if name in res.keys():
res[name][0] += additions
res[name][1] += deletions
res[name][2] += total
else:
res.update({name: [additions, deletions, total]})
print("打印结果并保存到文件:"+filename)
timestamp = datetime.datetime.now().timestamp()
date = datetime.datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S")
sql = "INSERT INTO code_lines (name, project_id, additions, deletions, total, start_time, end_time, update_time) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
if args.id:
print("按id查询时自动保存到mysql数据库")
with open(filename, mode='w', newline='') as file:
file.write("Git用户名,新增代码数,删除代码数,总计代码数\n")  # 写入表头
# 打印结果
print("Git用户名           新增代码数           删除代码数            总计代码数")
for k in res.keys():
res2=k + "," + str(res[k][0]) + "," +  str(res[k][1]) + "," + str(res[k][2])+"\n"
dname = k
did = args.id
dadd = res[k][0]
ddel = res[k][1]
dall = res[k][2]
dstart = args.start + " 00:00:00"
dend = args.end + " 23:59:59"
data = (dname,did,dadd,ddel,dall,dstart,dend,date)
#data = tuple('\"'+ k + '\"'+"," + str(args.id) + "," + str(res[k][0]) + "," +  str(res[k][1]) + "," + str(res[k][2]) + ","+ '\"' + args.start +" 00:00:00"+ '\"' + ","+ '\"' + args.end + " 23:59:59"+ '\"' + ","+ '\"' + date+ '\"' )
#print(type(data))
#print(data)
print(k + " " * str_format(k) + str(res[k][0]) + " " * str_format(str(res[k][0])) + str(
res[k][1]) + " " * str_format(str(res[k][1])) + str(res[k][2]))
file.write(res2)
if args.id:
to_mysql(sql,data)
file.close()

使用

python git-tj.py --start 2024-05-01 --end 2024-05-29 --group ops	#按项目组统计
python git-tj.py --start 2024-05-01 --end 2024-05-29 --id 346	#按项目ID统计
python git-tj.py --start 2024-05-01 --end 2024-05-29 --get all	#保存项目ID和名称到csv