我想编写一个shell脚本,通过传输命令行将源数据集复制到目标数据集。
注意:复制应该发生在数据集级别,因为我们在 bigquery 中有数千个数据集。
考虑以下方法:
#!/bin/bash
Project_Id='input your project name'
Location='input your location'
Tar_Data_Set='input you targeted dataset name'
for Data_Set in $(bq ls -n 1000 --project_id=${Project_Id} --location=${Location}| sed -n '3,$p')
[ $? -ne 0 ] && echo "Input parameter error" && exit 1
do
for Table_Name in $(bq ls ${Data_Set}| awk '{if(NR>2){print $1}}')
do
echo "bq cp -f ${Project_Id}.${Data_Set}.${Table_Name} ${Project_Id}.${Tar_Data_Set}.${Table_Name};" &>/dev/null
done
done
这里,我们假设它复制相同项目 ID 下的表。
#!/bin/bash
# (Written by Google Gemini)
# Get the source dataset name from the first command-line argument
if [ -z "$1" ] || [ -z "$2" ]; then
echo "Usage: $0 <source_dataset_name or project_id.source_dataset_name> <destination_dataset_name or project_id.destination_dataset_name>"
exit 1
fi
source_dataset="$1"
destination_dataset="$2"
# Extract project ID if provided in the source dataset
if [[ "$source_dataset" == *.* ]]; then
source_project=$(echo "$source_dataset" | cut -d '.' -f 1)
source_dataset=$(echo "$source_dataset" | cut -d '.' -f 2)
else
source_project=$(gcloud config get-value project)
fi
# Extract project ID if provided in the destination dataset
if [[ "$destination_dataset" == *.* ]]; then
destination_project=$(echo "$destination_dataset" | cut -d '.' -f 1)
destination_dataset=$(echo "$destination_dataset" | cut -d '.' -f 2)
else
destination_project=$(gcloud config get-value project)
fi
echo "Cloning dataset '$source_project:$source_dataset' to '$destination_project:$destination_dataset'"
# Check if the destination dataset exists
if ! bq show --dataset "$destination_project:$destination_dataset" &> /dev/null; then
echo "Creating destination dataset '$destination_project:$destination_dataset'..."
bq mk --dataset "$destination_project:$destination_dataset"
fi
# Loop through each table in the source dataset, skipping the first row
skip_first_row=true
bq ls "$source_project:$source_dataset" | while read line; do
if $skip_first_row; then
skip_first_row=false
continue
fi
# Extract the table ID using awk
tableId=$(echo "$line" | awk '{print $1}')
# Skip lines that don't start with a table ID
if [[ ! "$tableId" =~ ^[a-zA-Z0-9_]+$ ]]; then
continue
fi
echo "Cloning table '$tableId'..."
bq cp --clone -n "$source_project:$source_dataset.$tableId" "$destination_project:$destination_dataset.$tableId"
done
echo "Dataset cloned successfully!"