import csv
import sys
import time
import tempfile
import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor, as_completed
from flask import Flask, request, render_template_string, send_file
# ------------------ Common Functions ------------------ #
HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; BacklinkChecker/1.0; +https://example.com/bot)"
}
def fetch_text(url, timeout=12):
try:
r = requests.get(url, headers=HEADERS, timeout=timeout)
r.raise_for_status()
return r.text
except Exception:
return None
def check_backlink(source_url, target_url):
html = fetch_text(source_url)
if html is None:
return {"source": source_url, "found": False, "error": "fetch-failed"}
if target_url in html:
return {"source": source_url, "found": True, "method": "substring"}
try:
soup = BeautifulSoup(html, "html.parser")
for a in soup.find_all("a", href=True):
href = a["href"]
if target_url in href or href.rstrip("/") == target_url.rstrip("/"):
return {"source": source_url, "found": True, "method": "anchor", "href": href}
except Exception:
pass
return {"source": source_url, "found": False}
def run_check(sources, target, workers=12):
results = []
with ThreadPoolExecutor(max_workers=workers) as ex:
futures = {ex.submit(check_backlink, s, target): s for s in sources}
for fut in as_completed(futures):
try:
res = fut.result()
except Exception as e:
res = {"source": futures[fut], "found": False, "error": str(e)}
results.append(res)
return results
# ------------------ CLI Mode ------------------ #
def read_sources_from_csv(path):
urls = []
with open(path, newline="", encoding="utf-8") as f:
reader = csv.reader(f)
for row in reader:
if not row:
continue
urls.append(row[0].strip())
return urls
def write_results_csv(path, results):
with open(path, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["source_url", "found", "method", "href_or_error"])
for r in results:
writer.writerow([
r.get("source"),
r.get("found"),
r.get("method", ""),
r.get("href") or r.get("error", "")
])
def cli_mode(args):
if len(args) < 4:
print("Usage: python backlink_tool.py sources.csv target_url output.csv")
sys.exit(1)
sources_csv = args[1]
target = args[2].strip()
out_csv = args[3]
sources = read_sources_from_csv(sources_csv)
print(f"Loaded {len(sources)} sources. Checking for target: {target}")
start = time.time()
results = run_check(sources, target)
write_results_csv(out_csv, results)
elapsed = time.time() - start
print(f"Done. Results written to {out_csv}. Time: {elapsed:.1f}s")
# ------------------ Flask Web Mode ------------------ #
app = Flask(__name__)
TEMPLATE = """
Backlink Checker
Backlink Checker Tool
{% if results %}
Results ({{results|length}})
{% for r in results %}
{% endfor %}
| Source | Found | Method | Info |
|---|
| {{r.source}} |
{{r.found}} |
{{r.method or ""}} |
{{r.href or r.error or ""}} |
Download CSV
{% endif %}
"""
@app.route("/", methods=["GET", "POST"])
def index():
results = None
download_link = None
if request.method == "POST":
target = request.form.get("target").strip()
file = request.files.get("sources")
if not file or not target:
return "Missing file or target URL", 400
content = file.read().decode("utf-8").splitlines()
sources = [line.strip() for line in content if line.strip()]
results = run_check(sources, target)
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
writer = csv.writer(tmp)
writer.writerow(["source_url", "found", "method", "href_or_error"])
for r in results:
writer.writerow([
r.get("source"),
r.get("found"),
r.get("method", ""),
r.get("href") or r.get("error", "")
])
tmp.flush()
name = tmp.name.split("/")[-1]
app.config.setdefault("tmp_files", {})[name] = tmp.name
download_link = f"/download/{name}"
return render_template_string(TEMPLATE, results=results, download_link=download_link)
@app.route("/download/
")
def download(name):
path = app.config.get("tmp_files", {}).get(name)
if not path:
return "File not found", 404
return send_file(path, as_attachment=True, download_name="backlink_results.csv")
def web_mode():
print("Starting Flask web interface at http://localhost:5000")
app.run(host="0.0.0.0", port=5000, debug=False)
# ------------------ Entry Point ------------------ #
if __name__ == "__main__":
# CLI Mode if arguments provided, else start Web Mode
if len(sys.argv) > 1:
cli_mode(sys.argv)
else:
web_mode()
Comments
Post a Comment