gitlab按日期统计每个人提交代码量

今天接到老板命令,需要统计每个人每月的代码提交量,纳入每月的绩效考核(神奇的需求)。虽然可以通过查看 GitLab 上每个人的提交记录来统计,但是费时费力,所以这里通过脚本的方式来统计。

方法1:git命令行

git log --since=2024-05-01 --until=2024-05-28 --format='%aN' | sort -u | while read name; do echo -en "$name \t"; git log --author="$name" --pretty=tformat: --numstat | awk '{ add += $1; subs += $2; loc += $1 - $2 } END { printf "added lines: %s, removed lines: %s, total lines: %s\n", add, subs, loc }' -; done

方法2:gitlab api

apt install -y python3-dateutil
vi git-tj.py
#!/usr/bin/python3
# coding=utf8
# @Author: Mario
# @Date  : 2024/06/04
# @Desc  : GitLab 按时间查看各用户代码提交量官方API版
#token1 rw: KJAtxxxWmZ-
#token2 ro: JasKPzzzxmCKG3

#import pandas as pd
import mysql.connector
import json,datetime
import requests
import argparse
from dateutil.parser import parse

gitlab_url = "https://git.gaojinbo.com/"  # GitLab 地址
private_token = "JasKPzzzxmCKG3"  # GitLab Access Tokens(管理员权限)

info = []

headers = {
    'Connection': 'close',
}


# UTC时间转时间戳
def utc_time(time):
    dt = parse(time)
    return int(dt.timestamp())


# 输出格式化
def str_format(txt):
    lenTxt = len(txt)
    lenTxt_utf8 = len(txt.encode('utf-8'))
    size = int((lenTxt_utf8 - lenTxt) / 2 + lenTxt)
    length = 20 - size
    return length


# 获取 GitLab 上的所有项目ID和名称
def gitlab_projects_id_name():
    project_id_name = []
    #project_list = ["ng-kboss2","front","ops2","wechat-card"]
    page = 1
    #mysql插入存在则更新,不存在就插入
    sql = "INSERT INTO project_id_name (project_id, project_name ) VALUES (%s, %s) ON DUPLICATE KEY UPDATE project_name=VALUES(project_name)"
    while True:
        url = gitlab_url + "api/v4/projects/?private_token=" + private_token + "&page=" + str(page) + "&per_page=20"
        while True:
            try:
                res = requests.get(url, headers=headers, timeout=10)
                break
            except Exception as e:
                print(e)
                continue
        projects = json.loads(res.text)
        if len(projects) == 0:
            break
        else:
                for project in projects:
                    res2="project_id:"+str(project["id"])+",project_name:"+project["name"]
                    project_id = project["id"]
                    project_name= project["name"]
                    data = (project_id,project_name)
                    project_id_name.append(res2)
                    print(res2)
                    to_mysql(sql,data)
                page += 1
    #return project_id_name

# 获取 GitLab 上的所有项目
def gitlab_projects(project_group):
    project_ids = []
    project_all = []
    #project_list = ["ng-kboss2","front","ops2","wechat-card"]
    page = 1
    while True:
        url = gitlab_url + "api/v4/projects/?private_token=" + private_token + "&page=" + str(page) + "&per_page=20"
        while True:
            try:
                res = requests.get(url, headers=headers, timeout=10)
                break
            except Exception as e:
                print(e)
                continue
        projects = json.loads(res.text)
        if len(projects) == 0:
            break
        else:
            for project in projects:
                project_all.append(project["id"])
                if project["namespace"]["name"] == project_group and not project["archived"]:
                    project_ids.append(project["id"])
            page += 1
    print("Gitlab总共 " + str(len(project_all)) + " 个项目,在 " + project_group + " 组下,共获取到 " + str(len(project_ids)) + " 个")
    return project_ids

def gitlab_projects_id(project_id):
    projects_ids = []
    projects_ids.append(project_id)
    print("处理项目id " + str(project_id) )
    return projects_ids

# 获取 GitLab 上的项目 id 中的分支
def project_branches(project_id):
    branch_names = ["master"]
    return branch_names
    page = 1
    while True:
        url = gitlab_url + "api/v4/projects/" + str(
            project_id) + "/repository/branches?private_token=" + private_token + "&page=" + str(page) + "&per_page=20"
        while True:
            try:
                res = requests.get(url, headers=headers, timeout=10)
                break
            except Exception as e:
                print(e)
                continue
        branches = json.loads(res.text)
        #'''Debug
        print(url)
        print('--' * 10)
        print(branches)
        print('*' * 10)
        #'''
        if len(branches) == 0:
            break
        else:
            for branch in branches:
                branch_names.append(branch["name"])
            page += 1
    return branch_names


# 获取 GitLab 上的项目分支中的 commits,当 title 或 message 首单词为 Merge 时,表示合并操作,剔除此代码量
def project_commits(project_id, branch, start_time, end_time):
    commit_ids = []
    page = 1
    while True:
        url = gitlab_url + "api/v4/projects/" + str(
            project_id) + "/repository/commits?ref_name=" + branch + "&private_token=" + private_token + "&page=" + str(
            page) + "&per_page=20"
        while True:
            try:
                res = requests.get(url, headers=headers, timeout=10)
                break
            except Exception as e:
                print(e)
                continue
        commits = json.loads(res.text)
        if len(commits) == 0:
            break
        else:
            for commit in commits:
                if "Merge" in commit["title"] or "Merge" in commit["message"] or "合并" in commit["title"] or "合并" in \
                        commit["message"]:  # 不统计合并操作
                    continue
                elif utc_time(commit["authored_date"]) < utc_time(start_time) or utc_time(
                        commit["authored_date"]) > utc_time(end_time):  # 不满足时间区间
                    continue
                else:
                    commit_ids.append(commit["id"])
            page += 1
    return commit_ids


# 根据 commits 的 id 获取代码量
def commit_code(project_id, commit_id):
    global info
    url = gitlab_url + "api/v4/projects/" + str(
        project_id) + "/repository/commits/" + commit_id + "?private_token=" + private_token
    while True:
        try:
            res = requests.get(url, headers=headers, timeout=10)
            break
        except Exception as e:
            print(e)
            continue
    data = json.loads(res.text)
    temp = {"name": data["author_name"], "additions": data["stats"]["additions"],
            "deletions": data["stats"]["deletions"], "total": data["stats"]["total"]}  # Git工具用户名,新增代码数,删除代码数,总计代码数
    info.append(temp)


# GitLab 数据查询
def gitlab_info(start_time, end_time, project_group):
    for project_id in gitlab_projects(project_group):  # 遍历所有项目ID
        for branche_name in project_branches(project_id):  # 遍历每个项目中的分支
            for commit_id in project_commits(project_id, branche_name, start_time, end_time):  # 遍历每个分支中的 commit id
                commit_code(project_id, commit_id)  # 获取代码提交量

def gitlab_info_id(start_time, end_time, project_id):
    for project_id in gitlab_projects_id(project_id):  # 遍历所有项目ID
        for branche_name in project_branches(project_id):  # 遍历每个项目中的分支
            for commit_id in project_commits(project_id, branche_name, start_time, end_time):  # 遍历每个分支中的 commit id
                commit_code(project_id, commit_id)  # 获取代码提交量

def to_mysql(sql,data):
    # 连接到 MySQL 数据库
    mydb = mysql.connector.connect(
        host="192.168.21.195",
        user="root",
        port="13306",
        password="S8XbVyllyl",
        database="gitlab_tj",
        ssl_disabled='True'
        )

    # 创建游标对象
    mycursor = mydb.cursor()

    # 准备要插入的数据
    #data = ("John", "Doe", "john.doe@example.com")

    # 执行插入语句
    #print(type(data))
    #sql = "INSERT INTO code_lines (name, project_id, additions, deletions, total, start_time, end_time, update_time) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
    mycursor.execute(sql, data)

    #sql="select * from code_lines"
    #mycursor.execute(sql)
    #for row in mycursor:
    #   print(row)

    # 提交更改
    mydb.commit()

    # 打印插入的行数
    print("save to mysql",mycursor.rowcount, "record inserted.")

    # 关闭连接
    mydb.close()

if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    parser.add_argument("--start", required=True, help="开始时间,如2024-05-26")
    parser.add_argument("--end", required=True, help="结束时间,如2024-05-29")
    parser.add_argument("--group", help="gitlab组名称")
    parser.add_argument("--id", type=int, help="gitlab项目ID")
    parser.add_argument("--get", help="获取所有gitlab项目ID和名称")

    args = parser.parse_args()
    if args.group:
        filename="gitlab_"+args.group+"_"+args.start+"_"+args.end+".csv"
        print("正在统计数据,gitlab项目组:" + args.group +  ",分支:Master,开始时间:"+ args.start + " 结束时间:" + args.end + " 请耐心等待,这将花费不少时间~")
        gitlab_info(args.start + " 00:00:00", args.end + " 23:59:59", args.group)
    if args.id:
        filename="gitlab_"+str(args.id)+"_"+args.start+"_"+args.end+".csv"
        print("正在统计数据,gitlab项目id:" + str(args.id) +  ",分支:Master,开始时间:"+ args.start + " 结束时间:" + args.end + " 请耐心等待,这将花费不少时间~")
        gitlab_info_id(args.start + " 00:00:00", args.end + " 23:59:59", args.id)
    if args.get:
        gitlab_projects_id_name()
        exit()

    #gitlab_info(args.start + " 00:00:00", args.end + " 23:59:59", args.group)  # 起-止时间
    name = []  # Git工具用户名
    additions = []  # 新增代码数
    deletions = []  # 删除代码数
    total = []  # 总计代码数
    res = {}

    # 生成元组
    for i in info:
        for key, value in i.items():
            if key == "name":
                name.append(value)
            if key == "additions":
                additions.append(value)
            if key == "deletions":
                deletions.append(value)
            if key == "total":
                total.append(value)
    data = list(zip(name, additions, deletions, total))
    # print(data)
    # 去重累加
    for j in data:
        name = j[0]
        additions = j[1]
        deletions = j[2]
        total = j[3]
        if name in res.keys():
            res[name][0] += additions
            res[name][1] += deletions
            res[name][2] += total
        else:
            res.update({name: [additions, deletions, total]})

    print("打印结果并保存到文件:"+filename)
    timestamp = datetime.datetime.now().timestamp()
    date = datetime.datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S")
    sql = "INSERT INTO code_lines (name, project_id, additions, deletions, total, start_time, end_time, update_time) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
    if args.id:
        print("按id查询时自动保存到mysql数据库")
    with open(filename, mode='w', newline='') as file:
       file.write("Git用户名,新增代码数,删除代码数,总计代码数\n")  # 写入表头
    # 打印结果
       print("Git用户名           新增代码数           删除代码数            总计代码数")
       for k in res.keys():
          res2=k + "," + str(res[k][0]) + "," +  str(res[k][1]) + "," + str(res[k][2])+"\n"
          dname = k
          did = args.id
          dadd = res[k][0]
          ddel = res[k][1]
          dall = res[k][2]
          dstart = args.start + " 00:00:00"
          dend = args.end + " 23:59:59"
          data = (dname,did,dadd,ddel,dall,dstart,dend,date)
          #data = tuple('\"'+ k + '\"'+"," + str(args.id) + "," + str(res[k][0]) + "," +  str(res[k][1]) + "," + str(res[k][2]) + ","+ '\"' + args.start +" 00:00:00"+ '\"' + ","+ '\"' + args.end + " 23:59:59"+ '\"' + ","+ '\"' + date+ '\"' )
          #print(type(data))
          #print(data)
          print(k + " " * str_format(k) + str(res[k][0]) + " " * str_format(str(res[k][0])) + str(
            res[k][1]) + " " * str_format(str(res[k][1])) + str(res[k][2]))
          file.write(res2)
          if args.id:
            to_mysql(sql,data)
       file.close()

使用

python git-tj.py --start 2024-05-01 --end 2024-05-29 --group ops	#按项目组统计
python git-tj.py --start 2024-05-01 --end 2024-05-29 --id 346	#按项目ID统计
python git-tj.py --start 2024-05-01 --end 2024-05-29 --get all	#保存项目ID和名称到csv