fujian_water_biz_doc/scripts/ai-weekly-audit-diff.sh

#!/usr/bin/env bash
set -euo pipefail

repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
default_output="$repo_root/docs/design/00_Management/14_AI_Audit_Diff_Latest.md"
output_file="$default_output"
strict_mode="false"

while [[ $# -gt 0 ]]; do
  case "$1" in
    --output)
      output_file="$2"
      shift 2
      ;;
    --strict)
      strict_mode="true"
      shift
      ;;
    *)
      echo "未知参数: $1"
      echo "用法: $0 [--output <输出文件>] [--strict]"
      exit 1
      ;;
  esac
done

formal_files=(
  "docs/design/01_Overview/03_Summary_Design.md"
  "docs/design/02_Detailed_Design/01_Detailed_Design.md"
  "docs/design/03_Technical_Design/01_Database_Design.md"
  "docs/design/03_Technical_Design/03_Interface_Design.md"
  "docs/design/03_Technical_Design/04_Security_Design.md"
  "docs/design/03_Technical_Design/05_Deployment_Design.md"
  "docs/design/00_Management/04_Writing_Guide.md"
  "docs/design/00_Management/10_AI_Retrieval_Whitelist.md"
  "docs/design/00_Management/11_Main_Doc_Chapter_Index.md"
)

archive_root="$repo_root/docs/design/04_Appendix/Archive"
archive_tag_index="$archive_root/00_Archive_Tag_Index.md"

tmp_diff="$(mktemp)"
trap 'rm -f "$tmp_diff"' EXIT

diff_id=0

add_diff() {
  local category="$1"
  local file="$2"
  local line_no="$3"
  local symptom="$4"
  local suggestion="$5"
  local severity="$6"

  diff_id=$((diff_id + 1))
  symptom="${symptom//|/\\|}"
  suggestion="${suggestion//|/\\|}"
  printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" \
    "$diff_id" "$category" "$file" "$line_no" "$symptom" "$suggestion" "$severity" >>"$tmp_diff"
}

scan_pattern() {
  local category="$1"
  local pattern="$2"
  local suggestion="$3"
  local severity="$4"

  for rel_file in "${formal_files[@]}"; do
    local abs_file="$repo_root/$rel_file"
    [[ -f "$abs_file" ]] || continue

    while IFS= read -r row; do
      [[ -z "$row" ]] && continue
      local file_part="${row%%:*}"
      local rest="${row#*:}"
      local line_no="${rest%%:*}"
      local content="${rest#*:}"
      local file_rel="${file_part#"$repo_root/"}"

      if [[ "$category" == "数据库口径" ]] && grep -q "替换为达梦数据库" <<<"$content"; then
        continue
      fi

      add_diff "$category" "$file_rel" "$line_no" "$content" "$suggestion" "$severity"
    done < <(rg -n --no-heading --color never --with-filename --pcre2 "$pattern" "$abs_file" || true)
  done
}

# 规则 1：系统名称混用
scan_pattern \
  "系统名称口径" \
  "营业收费系统|数智营收管理系统|客户服务平台" \
  "统一使用“福建水务营收系统”（引用原文除外）" \
  "P2"

# 规则 2：数据库旧口径
scan_pattern \
  "数据库口径" \
  "OpenGauss|openGauss" \
  "统一为“达梦数据库 8.0+”，历史记录场景需显式标注" \
  "P1"

# 规则 3：接口占位编号
scan_pattern \
  "接口编号规范" \
  "IF-XXX|EXT-XXX" \
  "使用 IF-UP/IF-REV/IF-CS/IF-METER/IF-INST/IF-EXT 规则" \
  "P1"

# 规则 4：旧模块编号残留
scan_pattern \
  "模块编号规范" \
  "SERVICE-[0-9]{3}" \
  "统一使用 CS-00x 编号规则" \
  "P2"

# 规则 5：Archive 标签覆盖检查
if [[ ! -f "$archive_tag_index" ]]; then
  add_diff \
    "Archive 标签覆盖" \
    "docs/design/04_Appendix/Archive/00_Archive_Tag_Index.md" \
    "-" \
    "缺少 Archive 标签索引文件" \
    "先执行 make archive-tag-index 生成标签索引" \
    "P1"
else
  while IFS= read -r abs_md; do
    rel_md="${abs_md#"$repo_root/"}"
    if ! grep -Fq "\`$rel_md\`" "$archive_tag_index"; then
      add_diff \
        "Archive 标签覆盖" \
        "$rel_md" \
        "-" \
        "未在 Archive 标签索引中登记" \
        "执行 make archive-tag-index 更新标签清单" \
        "P2"
    fi
  done < <(find "$archive_root" -type f -name "*.md" ! -name "00_Archive_Tag_Index.md" | sort)
fi

total_count="$(wc -l < "$tmp_diff" | tr -d ' ')"
p1_count="$(awk -F'\t' '$7=="P1"{c++} END{print c+0}' "$tmp_diff")"
p2_count="$(awk -F'\t' '$7=="P2"{c++} END{print c+0}' "$tmp_diff")"

output_dir="$(dirname "$output_file")"
mkdir -p "$output_dir"

{
  echo "# 福建水务营收系统 AI 抽检差异清单（自动生成）"
  echo
  echo "## 1. 生成信息"
  echo
  echo "| 项目 | 内容 |"
  echo "| --- | --- |"
  echo "| 生成时间 | $(date '+%Y-%m-%d %H:%M:%S') |"
  echo "| 扫描范围 | P0 主文档 + P1 治理入口（非 Archive） |"
  echo "| 差异总数 | $total_count |"
  echo "| P1 差异数 | $p1_count |"
  echo "| P2 差异数 | $p2_count |"
  echo
  echo "## 2. 差异明细"
  echo
  echo "| 编号 | 类别 | 文件 | 行号 | 现象 | 建议 | 级别 |"
  echo "| --- | --- | --- | --- | --- | --- | --- |"

  if [[ "$total_count" -eq 0 ]]; then
    echo "| - | - | - | - | 未发现口径差异 | 无需修复 | - |"
  else
    while IFS=$'\t' read -r id category file line_no symptom suggestion severity; do
      printf '| %s | %s | `%s` | %s | %s | %s | %s |\n' \
        "$id" "$category" "$file" "$line_no" "$symptom" "$suggestion" "$severity"
    done < "$tmp_diff"
  fi

  echo
  echo "## 3. 建议动作"
  echo
  echo "1. 先修复 P1 差异，再处理 P2 优化项。"
  echo '2. 修复后执行：`make check-links`、`make validate-mermaid`、`make check-ai-governance`。'
  echo '3. 周检归档可复用 `docs/design/00_Management/12_AI_Weekly_Audit_Template.md`。'
} >"$output_file"

echo "✅ AI 抽检差异清单已生成: $output_file"
echo "   差异总数: $total_count (P1=$p1_count, P2=$p2_count)"

if [[ "$strict_mode" == "true" && "$total_count" -gt 0 ]]; then
  echo "❌ 严格模式下存在差异，返回失败。"
  exit 1
fi