修改ua信息

This commit is contained in:
2012-10-18 18:29:32 +08:00
parent 7b009a981a
commit 545c03edd7

View File

@@ -1,246 +1,246 @@
<?php <?php
/* /*
====================================================================== ======================================================================
lastRSS 0.9.1 lastRSS 0.9.1
Simple yet powerfull PHP class to parse RSS files. Simple yet powerfull PHP class to parse RSS files.
by Vojtech Semecky, webmaster @ webdot . cz by Vojtech Semecky, webmaster @ webdot . cz
Latest version, features, manual and examples: Latest version, features, manual and examples:
http://lastrss.webdot.cz/ http://lastrss.webdot.cz/
---------------------------------------------------------------------- ----------------------------------------------------------------------
LICENSE LICENSE
This program is free software; you can redistribute it and/or This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License (GPL) modify it under the terms of the GNU General Public License (GPL)
as published by the Free Software Foundation; either version 2 as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version. of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
To read the license please visit http://www.gnu.org/copyleft/gpl.html To read the license please visit http://www.gnu.org/copyleft/gpl.html
====================================================================== ======================================================================
*/ */
/** /**
* lastRSS * lastRSS
* Simple yet powerfull PHP class to parse RSS files. * Simple yet powerfull PHP class to parse RSS files.
*/ */
class lastRSS { class lastRSS {
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// Public properties // Public properties
// ------------------------------------------------------------------- // -------------------------------------------------------------------
var $default_cp = 'UTF-8'; var $default_cp = 'UTF-8';
var $CDATA = 'nochange'; var $CDATA = 'nochange';
var $cp = ''; var $cp = '';
var $items_limit = 0; var $items_limit = 0;
var $stripHTML = False; var $stripHTML = False;
var $date_format = ''; var $date_format = '';
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// Private variables // Private variables
// ------------------------------------------------------------------- // -------------------------------------------------------------------
var $channeltags = array ('title', 'link', 'description', 'language', 'copyright', 'managingEditor', 'webMaster', 'lastBuildDate', 'rating', 'docs'); var $channeltags = array ('title', 'link', 'description', 'language', 'copyright', 'managingEditor', 'webMaster', 'lastBuildDate', 'rating', 'docs');
var $itemtags = array('title', 'link', 'description', 'author', 'category', 'comments', 'enclosure', 'guid', 'pubDate', 'source'); var $itemtags = array('title', 'link', 'description', 'author', 'category', 'comments', 'enclosure', 'guid', 'pubDate', 'source');
var $imagetags = array('title', 'url', 'link', 'width', 'height'); var $imagetags = array('title', 'url', 'link', 'width', 'height');
var $textinputtags = array('title', 'description', 'name', 'link'); var $textinputtags = array('title', 'description', 'name', 'link');
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// Parse RSS file and returns associative array. // Parse RSS file and returns associative array.
// ------------------------------------------------------------------- // -------------------------------------------------------------------
function Get ($rss_url) { function Get ($rss_url) {
// If CACHE ENABLED // If CACHE ENABLED
if ($this->cache_dir != '') { if ($this->cache_dir != '') {
$cache_file = $this->cache_dir . '/rsscache_' . md5($rss_url); $cache_file = $this->cache_dir . '/rsscache_' . md5($rss_url);
$timedif = @(time() - filemtime($cache_file)); $timedif = @(time() - filemtime($cache_file));
if ($timedif < $this->cache_time) { if ($timedif < $this->cache_time) {
// cached file is fresh enough, return cached array // cached file is fresh enough, return cached array
$result = unserialize(join('', file($cache_file))); $result = unserialize(join('', file($cache_file)));
// set 'cached' to 1 only if cached file is correct // set 'cached' to 1 only if cached file is correct
if ($result) $result['cached'] = 1; if ($result) $result['cached'] = 1;
} else { } else {
// cached file is too old, create new // cached file is too old, create new
$result = $this->Parse($rss_url); $result = $this->Parse($rss_url);
$serialized = serialize($result); $serialized = serialize($result);
if ($f = @fopen($cache_file, 'w')) { if ($f = @fopen($cache_file, 'w')) {
fwrite ($f, $serialized, strlen($serialized)); fwrite ($f, $serialized, strlen($serialized));
fclose($f); fclose($f);
} }
if ($result) $result['cached'] = 0; if ($result) $result['cached'] = 0;
} }
} }
// If CACHE DISABLED >> load and parse the file directly // If CACHE DISABLED >> load and parse the file directly
else { else {
$result = $this->Parse($rss_url); $result = $this->Parse($rss_url);
if ($result) $result['cached'] = 0; if ($result) $result['cached'] = 0;
} }
// return result // return result
return $result; return $result;
} }
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// Modification of preg_match(); return trimed field with index 1 // Modification of preg_match(); return trimed field with index 1
// from 'classic' preg_match() array output // from 'classic' preg_match() array output
// ------------------------------------------------------------------- // -------------------------------------------------------------------
function my_preg_match ($pattern, $subject) { function my_preg_match ($pattern, $subject) {
// start regullar expression // start regullar expression
preg_match($pattern, $subject, $out); preg_match($pattern, $subject, $out);
// if there is some result... process it and return it // if there is some result... process it and return it
if(isset($out[1])) { if(isset($out[1])) {
// Process CDATA (if present) // Process CDATA (if present)
if ($this->CDATA == 'content') { // Get CDATA content (without CDATA tag) if ($this->CDATA == 'content') { // Get CDATA content (without CDATA tag)
$out[1] = strtr($out[1], array('<![CDATA['=>'', ']]>'=>'')); $out[1] = strtr($out[1], array('<![CDATA['=>'', ']]>'=>''));
} elseif ($this->CDATA == 'strip') { // Strip CDATA } elseif ($this->CDATA == 'strip') { // Strip CDATA
$out[1] = strtr($out[1], array('<![CDATA['=>'', ']]>'=>'')); $out[1] = strtr($out[1], array('<![CDATA['=>'', ']]>'=>''));
} }
// If code page is set convert character encoding to required // If code page is set convert character encoding to required
if ($this->cp != '') if ($this->cp != '')
//$out[1] = $this->MyConvertEncoding($this->rsscp, $this->cp, $out[1]); //$out[1] = $this->MyConvertEncoding($this->rsscp, $this->cp, $out[1]);
$out[1] = iconv($this->rsscp, $this->cp.'//TRANSLIT', $out[1]); $out[1] = iconv($this->rsscp, $this->cp.'//TRANSLIT', $out[1]);
// Return result // Return result
return trim($out[1]); return trim($out[1]);
} else { } else {
// if there is NO result, return empty string // if there is NO result, return empty string
return ''; return '';
} }
} }
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// Replace HTML entities &something; by real characters // Replace HTML entities &something; by real characters
// ------------------------------------------------------------------- // -------------------------------------------------------------------
function unhtmlentities ($string) { function unhtmlentities ($string) {
// Get HTML entities table // Get HTML entities table
$trans_tbl = get_html_translation_table (HTML_ENTITIES, ENT_QUOTES); $trans_tbl = get_html_translation_table (HTML_ENTITIES, ENT_QUOTES);
// Flip keys<==>values // Flip keys<==>values
$trans_tbl = array_flip ($trans_tbl); $trans_tbl = array_flip ($trans_tbl);
// Add support for &apos; entity (missing in HTML_ENTITIES) // Add support for &apos; entity (missing in HTML_ENTITIES)
$trans_tbl += array('&apos;' => "'"); $trans_tbl += array('&apos;' => "'");
// Replace entities by values // Replace entities by values
return strtr ($string, $trans_tbl); return strtr ($string, $trans_tbl);
} }
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// Get remote file or open url // Get remote file or open url
// ------------------------------------------------------------------- // -------------------------------------------------------------------
function getRemoteFile ($url){ function getRemoteFile ($url){
$results = ''; $results = '';
$context = stream_context_create(array( $context = stream_context_create(array(
'http' => array( 'http' => array(
'timeout' => 3 'timeout' => 3
) )
)); ));
if (get_cfg_var('allow_url_fopen')) { if (get_cfg_var('allow_url_fopen')) {
$f = fopen($url, 'r'); $f = fopen($url, 'r');
while (!feof($f)) { while (!feof($f)) {
$results .= fgets($f, 4096); $results .= fgets($f, 4096);
} }
fclose($f); fclose($f);
} else if (extension_loaded('curl')) { } else if (extension_loaded('curl')) {
$ch = curl_init(); $ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, 5); curl_setopt($ch, CURLOPT_TIMEOUT, 5);
curl_setopt($ch, CURLOPT_USERAGENT, 'PHP Curl/emlog Rss订阅插件'); curl_setopt($ch, CURLOPT_USERAGENT, 'PHP Curl/xiaosong.org 1.0 BETA');
$results = curl_exec($ch); $results = curl_exec($ch);
curl_close($ch); curl_close($ch);
} else { } else {
$results = file_get_contents($url, 0, $context); $results = file_get_contents($url, 0, $context);
} }
return $results; return $results;
} }
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// Parse() is private method used by Get() to load and parse RSS file. // Parse() is private method used by Get() to load and parse RSS file.
// Don't use Parse() in your scripts - use Get($rss_file) instead. // Don't use Parse() in your scripts - use Get($rss_file) instead.
// ------------------------------------------------------------------- // -------------------------------------------------------------------
function Parse ($rss_url) { function Parse ($rss_url) {
// Open and load RSS file // Open and load RSS file
$rss_content = $this->getRemoteFile($rss_url); $rss_content = $this->getRemoteFile($rss_url);
if (!empty($rss_content)) { if (!empty($rss_content)) {
// Parse document encoding // Parse document encoding
$result['encoding'] = $this->my_preg_match("'encoding=[\'\"](.*?)[\'\"]'si", $rss_content); $result['encoding'] = $this->my_preg_match("'encoding=[\'\"](.*?)[\'\"]'si", $rss_content);
// if document codepage is specified, use it // if document codepage is specified, use it
if ($result['encoding'] != '') if ($result['encoding'] != '')
{ $this->rsscp = $result['encoding']; } // This is used in my_preg_match() { $this->rsscp = $result['encoding']; } // This is used in my_preg_match()
// otherwise use the default codepage // otherwise use the default codepage
else else
{ $this->rsscp = $this->default_cp; } // This is used in my_preg_match() { $this->rsscp = $this->default_cp; } // This is used in my_preg_match()
// Parse CHANNEL info // Parse CHANNEL info
preg_match("'<channel.*?>(.*?)</channel>'si", $rss_content, $out_channel); preg_match("'<channel.*?>(.*?)</channel>'si", $rss_content, $out_channel);
foreach($this->channeltags as $channeltag) foreach($this->channeltags as $channeltag)
{ {
$temp = $this->my_preg_match("'<$channeltag.*?>(.*?)</$channeltag>'si", $out_channel[1]); $temp = $this->my_preg_match("'<$channeltag.*?>(.*?)</$channeltag>'si", $out_channel[1]);
if ($temp != '') $result[$channeltag] = $temp; // Set only if not empty if ($temp != '') $result[$channeltag] = $temp; // Set only if not empty
} }
// If date_format is specified and lastBuildDate is valid // If date_format is specified and lastBuildDate is valid
if ($this->date_format != '' && ($timestamp = strtotime($result['lastBuildDate'])) !==-1) { if ($this->date_format != '' && ($timestamp = strtotime($result['lastBuildDate'])) !==-1) {
// convert lastBuildDate to specified date format // convert lastBuildDate to specified date format
$result['lastBuildDate'] = date($this->date_format, $timestamp); $result['lastBuildDate'] = date($this->date_format, $timestamp);
} }
// Parse TEXTINPUT info // Parse TEXTINPUT info
preg_match("'<textinput(|[^>]*[^/])>(.*?)</textinput>'si", $rss_content, $out_textinfo); preg_match("'<textinput(|[^>]*[^/])>(.*?)</textinput>'si", $rss_content, $out_textinfo);
// This a little strange regexp means: // This a little strange regexp means:
// Look for tag <textinput> with or without any attributes, but skip truncated version <textinput /> (it's not beggining tag) // Look for tag <textinput> with or without any attributes, but skip truncated version <textinput /> (it's not beggining tag)
if (isset($out_textinfo[2])) { if (isset($out_textinfo[2])) {
foreach($this->textinputtags as $textinputtag) { foreach($this->textinputtags as $textinputtag) {
$temp = $this->my_preg_match("'<$textinputtag.*?>(.*?)</$textinputtag>'si", $out_textinfo[2]); $temp = $this->my_preg_match("'<$textinputtag.*?>(.*?)</$textinputtag>'si", $out_textinfo[2]);
if ($temp != '') $result['textinput_'.$textinputtag] = $temp; // Set only if not empty if ($temp != '') $result['textinput_'.$textinputtag] = $temp; // Set only if not empty
} }
} }
// Parse IMAGE info // Parse IMAGE info
preg_match("'<image.*?>(.*?)</image>'si", $rss_content, $out_imageinfo); preg_match("'<image.*?>(.*?)</image>'si", $rss_content, $out_imageinfo);
if (isset($out_imageinfo[1])) { if (isset($out_imageinfo[1])) {
foreach($this->imagetags as $imagetag) { foreach($this->imagetags as $imagetag) {
$temp = $this->my_preg_match("'<$imagetag.*?>(.*?)</$imagetag>'si", $out_imageinfo[1]); $temp = $this->my_preg_match("'<$imagetag.*?>(.*?)</$imagetag>'si", $out_imageinfo[1]);
if ($temp != '') $result['image_'.$imagetag] = $temp; // Set only if not empty if ($temp != '') $result['image_'.$imagetag] = $temp; // Set only if not empty
} }
} }
// Parse ITEMS // Parse ITEMS
preg_match_all("'<item(| .*?)>(.*?)</item>'si", $rss_content, $items); preg_match_all("'<item(| .*?)>(.*?)</item>'si", $rss_content, $items);
$rss_items = $items[2]; $rss_items = $items[2];
$i = 0; $i = 0;
$result['items'] = array(); // create array even if there are no items $result['items'] = array(); // create array even if there are no items
foreach($rss_items as $rss_item) { foreach($rss_items as $rss_item) {
// If number of items is lower then limit: Parse one item // If number of items is lower then limit: Parse one item
if ($i < $this->items_limit || $this->items_limit == 0) { if ($i < $this->items_limit || $this->items_limit == 0) {
foreach($this->itemtags as $itemtag) { foreach($this->itemtags as $itemtag) {
$temp = $this->my_preg_match("'<$itemtag.*?>(.*?)</$itemtag>'si", $rss_item); $temp = $this->my_preg_match("'<$itemtag.*?>(.*?)</$itemtag>'si", $rss_item);
if ($temp != '') $result['items'][$i][$itemtag] = $temp; // Set only if not empty if ($temp != '') $result['items'][$i][$itemtag] = $temp; // Set only if not empty
} }
// Strip HTML tags and other bullshit from DESCRIPTION // Strip HTML tags and other bullshit from DESCRIPTION
if ($this->stripHTML && $result['items'][$i]['description']) if ($this->stripHTML && $result['items'][$i]['description'])
$result['items'][$i]['description'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['description']))); $result['items'][$i]['description'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['description'])));
// Strip HTML tags and other bullshit from TITLE // Strip HTML tags and other bullshit from TITLE
if ($this->stripHTML && $result['items'][$i]['title']) if ($this->stripHTML && $result['items'][$i]['title'])
$result['items'][$i]['title'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['title']))); $result['items'][$i]['title'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['title'])));
// If date_format is specified and pubDate is valid // If date_format is specified and pubDate is valid
if ($this->date_format != '' && ($timestamp = strtotime($result['items'][$i]['pubDate'])) !==-1) { if ($this->date_format != '' && ($timestamp = strtotime($result['items'][$i]['pubDate'])) !==-1) {
// convert pubDate to specified date format // convert pubDate to specified date format
$result['items'][$i]['pubDate'] = date($this->date_format, $timestamp); $result['items'][$i]['pubDate'] = date($this->date_format, $timestamp);
} }
// Item counter // Item counter
$i++; $i++;
} }
} }
$result['items_count'] = $i; $result['items_count'] = $i;
return $result; return $result;
} }
else // Error in opening return False else // Error in opening return False
{ {
return False; return False;
} }
} }
} }
?> ?>