Skip to main content

Backlink

import csv import sys import time import tempfile import requests from bs4 import BeautifulSoup from concurrent.futures import ThreadPoolExecutor, as_completed from flask import Flask, request, render_template_string, send_file # ------------------ Common Functions ------------------ # HEADERS = { "User-Agent": "Mozilla/5.0 (compatible; BacklinkChecker/1.0; +https://example.com/bot)" } def fetch_text(url, timeout=12): try: r = requests.get(url, headers=HEADERS, timeout=timeout) r.raise_for_status() return r.text except Exception: return None def check_backlink(source_url, target_url): html = fetch_text(source_url) if html is None: return {"source": source_url, "found": False, "error": "fetch-failed"} if target_url in html: return {"source": source_url, "found": True, "method": "substring"} try: soup = BeautifulSoup(html, "html.parser") for a in soup.find_all("a", href=True): href = a["href"] if target_url in href or href.rstrip("/") == target_url.rstrip("/"): return {"source": source_url, "found": True, "method": "anchor", "href": href} except Exception: pass return {"source": source_url, "found": False} def run_check(sources, target, workers=12): results = [] with ThreadPoolExecutor(max_workers=workers) as ex: futures = {ex.submit(check_backlink, s, target): s for s in sources} for fut in as_completed(futures): try: res = fut.result() except Exception as e: res = {"source": futures[fut], "found": False, "error": str(e)} results.append(res) return results # ------------------ CLI Mode ------------------ # def read_sources_from_csv(path): urls = [] with open(path, newline="", encoding="utf-8") as f: reader = csv.reader(f) for row in reader: if not row: continue urls.append(row[0].strip()) return urls def write_results_csv(path, results): with open(path, "w", newline="", encoding="utf-8") as f: writer = csv.writer(f) writer.writerow(["source_url", "found", "method", "href_or_error"]) for r in results: writer.writerow([ r.get("source"), r.get("found"), r.get("method", ""), r.get("href") or r.get("error", "") ]) def cli_mode(args): if len(args) < 4: print("Usage: python backlink_tool.py sources.csv target_url output.csv") sys.exit(1) sources_csv = args[1] target = args[2].strip() out_csv = args[3] sources = read_sources_from_csv(sources_csv) print(f"Loaded {len(sources)} sources. Checking for target: {target}") start = time.time() results = run_check(sources, target) write_results_csv(out_csv, results) elapsed = time.time() - start print(f"Done. Results written to {out_csv}. Time: {elapsed:.1f}s") # ------------------ Flask Web Mode ------------------ # app = Flask(__name__) TEMPLATE = """ Backlink Checker

Backlink Checker Tool





{% if results %}

Results ({{results|length}})

{% for r in results %} {% endfor %}
SourceFoundMethodInfo
{{r.source}} {{r.found}} {{r.method or ""}} {{r.href or r.error or ""}}

Download CSV

{% endif %} """ @app.route("/", methods=["GET", "POST"]) def index(): results = None download_link = None if request.method == "POST": target = request.form.get("target").strip() file = request.files.get("sources") if not file or not target: return "Missing file or target URL", 400 content = file.read().decode("utf-8").splitlines() sources = [line.strip() for line in content if line.strip()] results = run_check(sources, target) tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") writer = csv.writer(tmp) writer.writerow(["source_url", "found", "method", "href_or_error"]) for r in results: writer.writerow([ r.get("source"), r.get("found"), r.get("method", ""), r.get("href") or r.get("error", "") ]) tmp.flush() name = tmp.name.split("/")[-1] app.config.setdefault("tmp_files", {})[name] = tmp.name download_link = f"/download/{name}" return render_template_string(TEMPLATE, results=results, download_link=download_link) @app.route("/download/") def download(name): path = app.config.get("tmp_files", {}).get(name) if not path: return "File not found", 404 return send_file(path, as_attachment=True, download_name="backlink_results.csv") def web_mode(): print("Starting Flask web interface at http://localhost:5000") app.run(host="0.0.0.0", port=5000, debug=False) # ------------------ Entry Point ------------------ # if __name__ == "__main__": # CLI Mode if arguments provided, else start Web Mode if len(sys.argv) > 1: cli_mode(sys.argv) else: web_mode()

Comments