文档相关功能

This commit is contained in:
wanjia 2025-04-02 12:25:40 +08:00
parent a90d31c420
commit 5eb4ccd7fd
5 changed files with 765 additions and 621 deletions

View File

@ -1,59 +0,0 @@
# 确保在项目根目录下执行此脚本
# 7-Zip路径 - 请根据实际安装位置修改
$7zipPath = "D:\7zip\7-Zip\7z.exe" # 原始路径
# $7zipPath = "C:\Program Files\7-Zip\7z.exe" # 修改后的路径
# 创建临时部署目录
New-Item -Path "deploy_temp" -ItemType Directory -Force
# 复制整个项目目录到临时目录
Copy-Item -Path "role_based_system" -Destination "deploy_temp\" -Recurse -Force
Copy-Item -Path "user_management" -Destination "deploy_temp\" -Recurse -Force
Copy-Item -Path "manage.py" -Destination "deploy_temp\" -Force
Copy-Item -Path "requirements.txt" -Destination "deploy_temp\" -Force
Copy-Item -Path "*.md" -Destination "deploy_temp\" -Force -ErrorAction SilentlyContinue
# 移除不需要的文件和目录
Get-ChildItem -Path "deploy_temp" -Recurse -Filter "__pycache__" -Directory | Remove-Item -Recurse -Force
Get-ChildItem -Path "deploy_temp" -Recurse -Filter "*.pyc" | Remove-Item -Force
# 特别排除不需要的目录
$excludeDirs = @(
".git", ".idea", ".vscode",
"venv", ".venv", "env", "__pycache__", "migrations"
)
foreach ($dir in $excludeDirs) {
Get-ChildItem -Path "deploy_temp" -Recurse -Directory -Filter $dir |
Where-Object { $_.FullName -notmatch "\\migrations\\__pycache__" } |
Remove-Item -Recurse -Force -ErrorAction SilentlyContinue
}
# 保留migrations目录但删除其中的pyc文件
if (Test-Path "deploy_temp\user_management\migrations") {
Get-ChildItem -Path "deploy_temp\user_management\migrations" -Filter "*.pyc" | Remove-Item -Force
Get-ChildItem -Path "deploy_temp\user_management\migrations" -Directory -Filter "__pycache__" | Remove-Item -Recurse -Force
}
# 排除不需要的文件
$excludeFiles = @(
"*.pyc", "*.pyo", "*.pyd", "*.so", "*.dll",
"*.db", "*.sqlite3", "*.log", "*.zip", "*.tar.gz",
"local_settings.py", "*.bak"
)
foreach ($pattern in $excludeFiles) {
Get-ChildItem -Path "deploy_temp" -Recurse -Filter $pattern | Remove-Item -Force -ErrorAction SilentlyContinue
}
# 使用7-Zip打包
& $7zipPath a -ttar knowledge_system.tar ".\deploy_temp\*"
& $7zipPath a -tgzip knowledge_system.tar.gz knowledge_system.tar
# 清理临时文件
Remove-Item -Path "knowledge_system.tar" -Force
Remove-Item -Path "deploy_temp" -Recurse -Force
Write-Host "部署包已创建: knowledge_system.tar.gz" -ForegroundColor Green

View File

@ -2,6 +2,8 @@ import json
import django import django
import os import os
import sys import sys
import pandas as pd
from django.db import transaction
# 设置 Django 环境 # 设置 Django 环境
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@ -30,7 +32,7 @@ def extract_field_value(field_value):
return field_value return field_value
def save_to_database(record): def save_to_database(record):
"""保存记录到数据库""" """从飞书多维表格保存记录到数据库"""
fields = record.fields fields = record.fields
record_id = record.record_id record_id = record.record_id
@ -57,7 +59,7 @@ def save_to_database(record):
'system_categories': extract_field_value(fields.get('系统展示的带货品类', [])), 'system_categories': extract_field_value(fields.get('系统展示的带货品类', [])),
'actual_categories': extract_field_value(fields.get('实际高播放量带货品类', [])), 'actual_categories': extract_field_value(fields.get('实际高播放量带货品类', [])),
'human_categories': fields.get('达人标想要货品类', ''), 'human_categories': fields.get('达人标想要货品类', ''),
'creator_base': '', # 如果有这个字段,添加相应的处理 'creator_base': '',
'notes': extract_field_value(fields.get('父记录', '')), 'notes': extract_field_value(fields.get('父记录', '')),
} }
@ -131,7 +133,92 @@ def fetch_all_records(client, app_token, table_id, user_access_token):
print(f"最终获取到 {len(total_records)} 条记录") print(f"最终获取到 {len(total_records)} 条记录")
return total_records return total_records
def main(): def update_from_excel(excel_file_path):
"""从Excel文件更新数据库记录"""
try:
print(f"开始读取Excel文件: {excel_file_path}")
df = pd.read_excel(excel_file_path)
if 'Handle' not in df.columns:
print("错误: Excel文件中缺少'Handle'")
return
update_count = 0
skip_count = 0
error_count = 0
# 获取可更新的字段列表
excluded_fields = {'id', 'record_id', 'created_at', 'updated_at'}
model_fields = {f.name for f in FeishuCreator._meta.get_fields()} - excluded_fields
valid_columns = set(df.columns) & model_fields
print(f"可更新的列: {valid_columns}")
with transaction.atomic():
for index, row in df.iterrows():
try:
handle = str(row['Handle']).strip()
if not handle:
print(f"跳过第{index+2}行: Handle为空")
skip_count += 1
continue
# 查找现有记录
creator = FeishuCreator.objects.filter(handle=handle).first()
if not creator:
print(f"跳过Handle为'{handle}'的记录: 数据库中不存在")
skip_count += 1
continue
# 准备更新数据
update_data = {}
for column in valid_columns:
if column == 'Handle':
continue
value = row[column]
if pd.isna(value):
continue
# 处理特殊类型
if isinstance(value, (list, dict)):
value = json.dumps(value)
elif isinstance(value, (int, float)):
if column in ['fans_count']:
value = int(value)
else:
value = str(value)
else:
value = str(value).strip()
if value:
update_data[column] = value
# 更新记录
if update_data:
for field, value in update_data.items():
setattr(creator, field, value)
creator.save()
update_count += 1
print(f"已更新Handle为'{handle}'的记录")
else:
skip_count += 1
print(f"跳过Handle为'{handle}'的记录: 无需更新")
except Exception as e:
error_count += 1
print(f"处理Handle'{handle}'时出错: {str(e)}")
print("\nExcel更新完成统计信息")
print(f"更新记录数:{update_count}")
print(f"跳过记录数:{skip_count}")
print(f"错误记录数:{error_count}")
except Exception as e:
print(f"处理Excel文件时出错: {str(e)}")
def sync_from_feishu():
"""从飞书多维表格同步数据"""
# 创建client # 创建client
client = lark.Client.builder() \ client = lark.Client.builder() \
.enable_set_token(True) \ .enable_set_token(True) \
@ -143,15 +230,13 @@ def main():
TABLE_ID = "tbl3oikG3F8YYtVA" TABLE_ID = "tbl3oikG3F8YYtVA"
USER_ACCESS_TOKEN = "u-ecM5BmzKx4uHz3sG0FouQSk1l9kxgl_3Xa00l5Ma24Jy" USER_ACCESS_TOKEN = "u-ecM5BmzKx4uHz3sG0FouQSk1l9kxgl_3Xa00l5Ma24Jy"
# 获取所有记录 print("开始从飞书同步数据...")
print("开始获取所有记录...")
all_records = fetch_all_records(client, APP_TOKEN, TABLE_ID, USER_ACCESS_TOKEN) all_records = fetch_all_records(client, APP_TOKEN, TABLE_ID, USER_ACCESS_TOKEN)
if not all_records: if not all_records:
print("未获取到任何记录") print("未获取到任何记录")
return return
# 更新数据库
print("\n开始更新数据库...") print("\n开始更新数据库...")
created_count = 0 created_count = 0
updated_count = 0 updated_count = 0
@ -162,7 +247,7 @@ def main():
if creator: if creator:
if created: if created:
created_count += 1 created_count += 1
if created_count % 10 == 0: # 每10条才打印一次避免输出过多 if created_count % 10 == 0:
print(f"已创建 {created_count} 条记录...") print(f"已创建 {created_count} 条记录...")
else: else:
updated_count += 1 updated_count += 1
@ -172,13 +257,34 @@ def main():
error_count += 1 error_count += 1
print(f"处理记录失败") print(f"处理记录失败")
# 打印统计信息 print("\n飞书同步完成!统计信息:")
print("\n更新完成!统计信息:")
print(f"新建记录:{created_count}") print(f"新建记录:{created_count}")
print(f"更新记录:{updated_count}") print(f"更新记录:{updated_count}")
print(f"错误记录:{error_count}") print(f"错误记录:{error_count}")
print(f"总记录数:{len(all_records)}") print(f"总记录数:{len(all_records)}")
def main():
"""主函数"""
if len(sys.argv) < 2:
print("使用方法:")
print("1. 从飞书同步: python feishu.py sync")
print("2. 从Excel更新: python feishu.py excel <excel文件路径>")
return
command = sys.argv[1]
if command == 'sync':
sync_from_feishu()
elif command == 'excel':
if len(sys.argv) != 3:
print("使用方法: python feishu.py excel <excel文件路径>")
return
excel_file_path = sys.argv[2]
update_from_excel(excel_file_path)
else:
print("无效的命令。使用方法:")
print("1. 从飞书同步: python feishu.py sync")
print("2. 从Excel更新: python feishu.py excel <excel文件路径>")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -0,0 +1,35 @@
# Generated by Django 5.1.5 on 2025-03-31 14:03
import django.db.models.deletion
import uuid
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('user_management', '0003_alter_feishucreator_handle'),
]
operations = [
migrations.CreateModel(
name='KnowledgeBaseDocument',
fields=[
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
('document_id', models.CharField(max_length=100, verbose_name='文档ID')),
('document_name', models.CharField(max_length=255, verbose_name='文档名称')),
('external_id', models.CharField(max_length=100, verbose_name='外部文档ID')),
('status', models.CharField(choices=[('active', '有效'), ('deleted', '已删除')], default='active', max_length=20, verbose_name='状态')),
('create_time', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')),
('update_time', models.DateTimeField(auto_now=True, verbose_name='更新时间')),
('knowledge_base', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='kb_documents', to='user_management.knowledgebase', verbose_name='知识库')),
],
options={
'verbose_name': '知识库文档',
'verbose_name_plural': '知识库文档',
'db_table': 'knowledge_base_documents',
'indexes': [models.Index(fields=['knowledge_base', 'status'], name='knowledge_b_knowled_a4db1b_idx'), models.Index(fields=['document_id'], name='knowledge_b_documen_dab90f_idx'), models.Index(fields=['external_id'], name='knowledge_b_externa_b0060c_idx')],
'unique_together': {('knowledge_base', 'document_id')},
},
),
]

View File

@ -669,3 +669,41 @@ class FeishuCreator(models.Model):
db_table = 'feishu_creators' db_table = 'feishu_creators'
verbose_name = '创作者数据' verbose_name = '创作者数据'
verbose_name_plural = '创作者数据' verbose_name_plural = '创作者数据'
class KnowledgeBaseDocument(models.Model):
"""知识库文档关联模型"""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
knowledge_base = models.ForeignKey(
'KnowledgeBase',
on_delete=models.CASCADE,
related_name='kb_documents',
verbose_name='知识库'
)
document_id = models.CharField(max_length=100, verbose_name='文档ID')
document_name = models.CharField(max_length=255, verbose_name='文档名称')
external_id = models.CharField(max_length=100, verbose_name='外部文档ID')
status = models.CharField(
max_length=20,
default='active',
choices=[
('active', '有效'),
('deleted', '已删除')
],
verbose_name='状态'
)
create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
update_time = models.DateTimeField(auto_now=True, verbose_name='更新时间')
class Meta:
db_table = 'knowledge_base_documents'
unique_together = ['knowledge_base', 'document_id']
indexes = [
models.Index(fields=['knowledge_base', 'status']),
models.Index(fields=['document_id']),
models.Index(fields=['external_id'])
]
verbose_name = '知识库文档'
verbose_name_plural = '知识库文档'
def __str__(self):
return f"{self.knowledge_base.name} - {self.document_name}"

File diff suppressed because it is too large Load Diff