<?php
/**
 * get_news.php
 * Usage:
 *   get_news.php?page=200
 * It will fetch the page, save 90 items, then redirect to page=199 ... until page=1
 */

/* =========================
   CONFIG
   ========================= */
$dbHost = 'localhost';
$dbUser = 'root';
$dbPass = '';
$dbName = 'alasemah1';

$defaultStartPage = 200;      // إذا ما بعثت page بالـ URL
$minPage = 1;
$maxPage = 200;              // حماية بسيطة
$redirectDelaySeconds = 1;    // زمن بسيط بين الصفحات لتخفيف الضغط

/* =========================
   DB CLASS (مختصر + كافي)
   ========================= */
class DB {
    private $dbConnect;

    public function connect($db, $server = "localhost", $user = "root", $pass = "") {
        $this->dbConnect = mysqli_connect(trim($server), trim($user), trim($pass), trim($db));
        if (!$this->dbConnect) {
            throw new Exception("DB connect error: " . mysqli_connect_error());
        }
        mysqli_set_charset($this->dbConnect, 'utf8mb4');
    }

    public function dbquery($query) {
        $result = mysqli_query($this->dbConnect, $query);
        if (!$result) {
            throw new Exception("SQL error: " . mysqli_error($this->dbConnect) . "\nSQL: " . $query);
        }
        return true;
    }

    public function sqlSafe($value, $quote = "'") {
        // ملاحظة: لا تحوّل ' إلى HTML entity هنا، خليه SQL escaping فقط
        $value = (string)$value;
        $value = mysqli_real_escape_string($this->dbConnect, $value);
        return $quote . $value . $quote;
    }
}

/* =========================
   HELPERS
   ========================= */
function fetchUrl($url, $timeout = 25) {
    $ch = curl_init($url);
    curl_setopt_array($ch, [
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_TIMEOUT => $timeout,
        CURLOPT_CONNECTTIMEOUT => 10,
        CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) PHP Crawler',
    ]);
    $html = curl_exec($ch);
    $err  = curl_error($ch);
    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    curl_close($ch);

    if ($html === false || !$html) return [null, "cURL error: " . ($err ?: "unknown")];
    if ($code >= 400) return [null, "HTTP error code: $code"];
    return [$html, null];
}

function extractSectionFromUrl($url) {
    $parts = parse_url($url);
    if (empty($parts['path'])) return null;

    $path = trim($parts['path'], '/'); // economy/2365896/
    $segs = explode('/', $path);
    return $segs[0] ?? null;
}

function originalSanaCdnImage($imgUrl) {
    if (!$imgUrl) return null;

    $parts = parse_url($imgUrl);
    if (empty($parts['scheme']) || empty($parts['host']) || empty($parts['path'])) {
        return $imgUrl;
    }

    $path = $parts['path'];
    // إزالة -123x456 قبل الامتداد
    $newPath = preg_replace('/-\d+x\d+(?=\.(jpg|jpeg|png|webp|gif)$)/i', '', $path);

    $rebuilt = $parts['scheme'] . '://' . $parts['host'] . $newPath;
    if (!empty($parts['query'])) $rebuilt .= '?' . $parts['query'];

    return $rebuilt;
}

function parseSanaPage($html) {
    libxml_use_internal_errors(true);

    $dom = new DOMDocument();
    $dom->loadHTML($html);
    $xp = new DOMXPath($dom);

    // حسب الهيكل الذي أعطيتني: .blog-content ثم .p-wrap[data-pid]
    $wraps = $xp->query("//div[contains(@class,'blog-content')]//div[contains(@class,'p-wrap') and @data-pid]");

    $items = [];
    foreach ($wraps as $w) {
        $pid = trim($w->getAttribute('data-pid'));
        if ($pid === '') continue;

        $a = $xp->query(".//a[contains(@class,'p-url')][1]", $w)->item(0);
        if (!$a) continue;

        $url   = trim($a->getAttribute('href'));
        $title = trim(preg_replace('/\s+/u', ' ', $a->textContent));
        if ($url === '' || $title === '') continue;

        $section = extractSectionFromUrl($url);

        $timeNode = $xp->query(".//time[contains(@class,'published')][1]", $w)->item(0);
        $publishedAt = null;
        if ($timeNode) {
            $dt = trim($timeNode->getAttribute('datetime')); // 2026-01-02T08:32:12.000Z
            if ($dt !== '') {
                $publishedAt = substr(str_replace('T', ' ', $dt), 0, 19); // 2026-01-02 08:32:12
            }
        }

        // الصورة: img.featured-img
        $imgNode = $xp->query(".//img[contains(@class,'featured-img')][1]", $w)->item(0);
        $img = null;
        if ($imgNode) {
            $src = trim($imgNode->getAttribute('src'));
            if ($src !== '') {
                $img = originalSanaCdnImage($src);
            }
        }

        $items[] = [
            'pid' => (int)$pid,
            'title' => $title,
            'url' => $url,
            'section' => $section,
            'image_url' => $img,
            'published_at' => $publishedAt,
        ];
    }

    return $items;
}

function ensureTable(DB $db) {
    $db->dbquery("
        CREATE TABLE IF NOT EXISTS news (
          id INT AUTO_INCREMENT PRIMARY KEY,
          pid BIGINT NOT NULL,
          title VARCHAR(700) NOT NULL,
          url VARCHAR(700) NOT NULL,
          section VARCHAR(50) NULL,
          image_url VARCHAR(800) NULL,
          published_at DATETIME NULL,
          created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
          UNIQUE KEY uq_pid (pid),
          UNIQUE KEY uq_url (url)
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
    ");
}

function saveNews(DB $db, array $items) {
    foreach ($items as $it) {
        $pid = (int)$it['pid'];
        $title = $db->sqlSafe($it['title']);
        $url   = $db->sqlSafe($it['url']);

        $section = "NULL";
        if (!empty($it['section'])) $section = $db->sqlSafe($it['section']);

        $image = "NULL";
        if (!empty($it['image_url'])) $image = $db->sqlSafe($it['image_url']);

        $published = "NULL";
        if (!empty($it['published_at'])) $published = $db->sqlSafe($it['published_at']);

        $sql = "
            INSERT IGNORE INTO `news` (`pid`,`title`,`url`,`section`,`image_url`,`published_at`)
            VALUES ($pid, $title, $url, $section, $image, $published)
        ";
        $db->dbquery($sql);
    }
}

function redirectTo($url, $seconds = 0) {
    // إذا ما في Output قبلها، Location يكفي
    if (!headers_sent()) {
        if ($seconds > 0) {
            header("Refresh: $seconds; url=$url");
        } else {
            header("Location: $url");
        }
        exit;
    }

    // fallback إذا headers_sent()
    echo '<meta http-equiv="refresh" content="'.(int)$seconds.';url='.htmlspecialchars($url, ENT_QUOTES, 'UTF-8').'">';
    exit;
}

/* =========================
   MAIN
   ========================= */
try {
    $page = isset($_GET['page']) ? (int)$_GET['page'] : (int)$defaultStartPage;

    if ($page < $minPage) $page = $minPage;
    if ($page > $maxPage) $page = $maxPage;

    $db = new DB();
    $db->connect($dbName, $dbHost, $dbUser, $dbPass);
    ensureTable($db);

    $pageUrl = "https://sana.sy/syria-news/page/$page/";
    [$html, $err] = fetchUrl($pageUrl);

    header('Content-Type: text/html; charset=utf-8');

    echo "<h3>Processing page: $page</h3>";
    echo "<div>URL: " . htmlspecialchars($pageUrl, ENT_QUOTES, 'UTF-8') . "</div>";

    if (!$html) {
        echo "<div style='color:red'>Fetch failed: " . htmlspecialchars($err, ENT_QUOTES, 'UTF-8') . "</div>";

        // حتى لو فشل، ننزل للصفحة اللي بعدها لتجنب تعليق السلسلة
        $next = $page - 1;
        if ($next >= $minPage) {
            $nextUrl = "get_news.php?page=$next";
            echo "<div>Redirecting to: $nextUrl</div>";
            redirectTo($nextUrl, $redirectDelaySeconds);
        } else {
            echo "<div>Done. Reached page $minPage.</div>";
        }
        exit;
    }

    $items = parseSanaPage($html);
    echo "<div>Parsed: " . count($items) . " items</div>";

    saveNews($db, $items);
    echo "<div>Saved (INSERT IGNORE used).</div>";

    // Redirect للصفحة السابقة
    $next = $page - 1;
    if ($next >= $minPage) {
        $nextUrl = "get_news.php?page=$next";
        echo "<div>Next: <a href='$nextUrl'>$nextUrl</a></div>";
        redirectTo($nextUrl, $redirectDelaySeconds);
    } else {
        echo "<div><strong>Done.</strong> Reached page $minPage.</div>";
        // إذا بدك “يعيد من جديد” بعد ما يوصل 1:
        // redirectTo("get_news.php?page=$maxPage", 5);
    }

} catch (Exception $e) {
    header('Content-Type: text/plain; charset=utf-8');
    echo "ERROR:\n" . $e->getMessage();
}
