Compare commits
7 Commits
10e14b41cc
...
6bc9772f58
| Author | SHA1 | Date | |
|---|---|---|---|
| 6bc9772f58 | |||
| 1aa0388e80 | |||
| 08ffabe0d6 | |||
| d5bfda08c2 | |||
| 6800d6f979 | |||
| af5ebf21f1 | |||
| 700f36c2c5 |
8
Example.info.json
Normal file
8
Example.info.json
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"id": "",
|
||||
"channel_id": "Change To Channel ID/username",
|
||||
"uploader": "Change To Channel ID/username",
|
||||
"title": "Example",
|
||||
"upload_date": "",
|
||||
"thumbnail": null
|
||||
}
|
||||
74
README.md
74
README.md
@ -1 +1,73 @@
|
||||
Test
|
||||
# Tube-Archivist Scripts
|
||||
|
||||
Small collection of Bash helpers used to prepare offline / archived YouTube videos for import into TubeArchivist. Written for Debian-like systems; should work in other Linux distributions with Bash and standard GNU utilities.
|
||||
|
||||
---
|
||||
|
||||
## Goal
|
||||
Normalize filenames and create accompanying metadata (.info.json) so TubeArchivist can ingest local archives (especially those from archive.org or other offline sources).
|
||||
|
||||
Example input filename:
|
||||
`20170311 (5XtCZ1Fa9ag) Terry A Davis Live Stream.mp4`
|
||||
|
||||
Resulting filename and sidecar JSON:
|
||||
- `20170311 Terry A Davis Live Stream [5XtCZ1Fa9ag].mp4`
|
||||
- `20170311 Terry A Davis Live Stream [5XtCZ1Fa9ag].info.json`
|
||||
|
||||
---
|
||||
|
||||
## How it works / Usage
|
||||
1. Put all the scripts in the directory with your video files (scripts currently do not recurse into subdirectories).
|
||||
2. Run them in order from the directory containing your media:
|
||||
|
||||
```sh
|
||||
bash convert-()-to-[].bash
|
||||
bash move-[id]-to-end.bash
|
||||
bash create-json-alongside.bash
|
||||
bash insert-id-into-json.bash
|
||||
bash insert-title-into-json.bash
|
||||
bash insert-date-into-json.bash
|
||||
```
|
||||
|
||||
Each script performs a single transformation so you can inspect results between steps.
|
||||
|
||||
---
|
||||
|
||||
## Scripts (order and purpose)
|
||||
1. `convert-()-to-[].bash`
|
||||
- Replace parentheses containing an ID with square brackets (e.g. `(ID)` -> `[ID]`) and clean spacing.
|
||||
|
||||
2. `move-[id]-to-end.bash`
|
||||
- Ensure the video ID appears at the end of the filename inside square brackets.
|
||||
|
||||
3. `create-json-alongside.bash`
|
||||
- Create an empty `.info.json` file for each video filename (sidecar).
|
||||
|
||||
4. `insert-id-into-json.bash`
|
||||
- Populate the sidecar JSON with the video ID field.
|
||||
|
||||
5. `insert-title-into-json.bash`
|
||||
- Insert the cleaned title into the sidecar JSON.
|
||||
|
||||
6. `insert-date-into-json.bash`
|
||||
- Insert the date (if available) into the sidecar JSON.
|
||||
|
||||
---
|
||||
|
||||
## Notes and tips
|
||||
- Scripts do not process subdirectories. Run at the directory root for each archive.
|
||||
- Always test on a copy or run a subset first to confirm behavior.
|
||||
- If filenames contain unusual characters, run a quick grep for non-ASCII prior to processing.
|
||||
- Modify scripts to add dry-run mode if you want safer previews.
|
||||
|
||||
---
|
||||
|
||||
## Example archive
|
||||
Archive used for testing:
|
||||
`https://archive.org/details/TempleOS-TheMissingVideos`
|
||||
|
||||
Processed example (after running full pipeline):
|
||||
`20170311 Terry A Davis Live Stream [5XtCZ1Fa9ag].mp4`
|
||||
`20170311 Terry A Davis Live Stream [5XtCZ1Fa9ag].info.json`
|
||||
|
||||
---
|
||||
|
||||
21
create-json-alongside.bash
Normal file
21
create-json-alongside.bash
Normal file
@ -0,0 +1,21 @@
|
||||
#!/bin/bash
|
||||
template="Example.info.json"
|
||||
|
||||
# Verify template exists
|
||||
if [ ! -f "$template" ]; then
|
||||
echo "Error: $template not found."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Collect all matching video files safely
|
||||
for f in *.mp4 *.mkv *.mov *.avi; do
|
||||
# Skip if no matching files
|
||||
[ -e "$f" ] || continue
|
||||
[ -f "$f" ] || continue
|
||||
|
||||
base="${f%.*}" # Remove extension
|
||||
target="${base}.info.json" # Construct new name
|
||||
|
||||
cp -- "$template" "$target"
|
||||
echo "Created: $target"
|
||||
done
|
||||
29
insert-date-into-json.bash
Normal file
29
insert-date-into-json.bash
Normal file
@ -0,0 +1,29 @@
|
||||
#!/bin/bash
|
||||
|
||||
for f in *.info.json; do
|
||||
[ -f "$f" ] || continue
|
||||
|
||||
# Extract title without extension
|
||||
title="${f%.info.json}"
|
||||
# Remove trailing ID if present
|
||||
title="${title% \[*\]}"
|
||||
|
||||
# Extract the date at the beginning of the title (YYYYMMDD)
|
||||
if [[ "$title" =~ ^([0-9]{8}) ]]; then
|
||||
upload_date="${BASH_REMATCH[1]}"
|
||||
else
|
||||
echo "No date found in $f, skipping."
|
||||
continue
|
||||
fi
|
||||
|
||||
# Update JSON upload_date
|
||||
if command -v jq >/dev/null 2>&1; then
|
||||
tmpfile=$(mktemp)
|
||||
jq --arg date "$upload_date" '.upload_date = $date' "$f" > "$tmpfile" && mv "$tmpfile" "$f"
|
||||
echo "Updated upload_date in $f: $upload_date"
|
||||
else
|
||||
# Simple sed fallback (only for simple JSON)
|
||||
sed -i "s/\"upload_date\": *\"\"/\"upload_date\": \"$upload_date\"/" "$f"
|
||||
echo "Updated upload_date in $f: $upload_date (using sed)"
|
||||
fi
|
||||
done
|
||||
23
insert-id-into-json.bash
Normal file
23
insert-id-into-json.bash
Normal file
@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Loop over all .info.json files in the current directory
|
||||
for f in *.info.json; do
|
||||
[ -f "$f" ] || continue # skip if no files
|
||||
|
||||
# Extract ID from filename: match [ID] at the end (before .info.json)
|
||||
id=$(echo "$f" | sed -n 's/.*\[\([^]]*\)\]\.info\.json/\1/p')
|
||||
|
||||
# If no ID found, skip this file
|
||||
[ -z "$id" ] && continue
|
||||
|
||||
# Use 'jq' to safely update the "id" field
|
||||
if command -v jq >/dev/null 2>&1; then
|
||||
tmpfile=$(mktemp)
|
||||
jq --arg newid "$id" '.id = $newid' "$f" > "$tmpfile" && mv "$tmpfile" "$f"
|
||||
echo "Updated $f with id: $id"
|
||||
else
|
||||
# If jq not installed, fallback with sed (assumes simple JSON format)
|
||||
sed -i "s/\"id\": *\"\"/\"id\": \"$id\"/" "$f"
|
||||
echo "Updated $f with id: $id (using sed)"
|
||||
fi
|
||||
done
|
||||
20
insert-title-into-json.bash
Normal file
20
insert-title-into-json.bash
Normal file
@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
|
||||
for f in *.info.json; do
|
||||
[ -f "$f" ] || continue
|
||||
|
||||
# Extract title: remove .info.json and remove ID in brackets at the end
|
||||
title="${f%.info.json}" # Remove extension
|
||||
title="${title% \[*\]}" # Remove trailing [ID]
|
||||
|
||||
# Update JSON title only
|
||||
if command -v jq >/dev/null 2>&1; then
|
||||
tmpfile=$(mktemp)
|
||||
jq --arg newtitle "$title" '.title = $newtitle' "$f" > "$tmpfile" && mv "$tmpfile" "$f"
|
||||
echo "Updated title in $f: $title"
|
||||
else
|
||||
# Simple sed fallback
|
||||
sed -i "s/\"title\": *\"\"/\"title\": \"$title\"/" "$f"
|
||||
echo "Updated title in $f: $title (using sed)"
|
||||
fi
|
||||
done
|
||||
28
move-[id]-to-end.bash
Normal file
28
move-[id]-to-end.bash
Normal file
@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
for f in *; do
|
||||
# Skip directories
|
||||
[ -f "$f" ] || continue
|
||||
|
||||
# Extract the ID inside () or []
|
||||
id=$(echo "$f" | sed -n 's/.*[([]\([^])]*\)[])].*/\1/p')
|
||||
|
||||
# If there's no ID, skip
|
||||
[ -z "$id" ] && continue
|
||||
|
||||
# Remove the ID portion (and any leftover extra spaces)
|
||||
base=$(echo "$f" | sed 's/[([][^])]*[])]//g' | sed 's/ / /g' | sed 's/ *$//')
|
||||
|
||||
# Separate name and extension
|
||||
name="${base%.*}"
|
||||
ext="${base##*.}"
|
||||
|
||||
# Rebuild new name (handle files with and without extensions)
|
||||
if [ "$name" != "$ext" ]; then
|
||||
newname="${name} [${id}].${ext}"
|
||||
else
|
||||
newname="${base} [${id}]"
|
||||
fi
|
||||
|
||||
# Only rename if different
|
||||
[ "$f" != "$newname" ] && mv -- "$f" "$newname"
|
||||
done
|
||||
Loading…
x
Reference in New Issue
Block a user