a
     
 
       
   Developpeur web    
       
   Php Mysql et divers    
       
   Shitao    
   Sénèque    
   Recettes    
       


> Exemple de script pour parser un mot clé de googleNews avec un mutu chez ovh


function purge_iso88591($str)
{
 $cp1252_map = array(
    "\x80" => "euro", /* EURO SIGN */
    "\x82" => ",", /* SINGLE LOW-9 QUOTATION MARK */
    "\x83" => "f",     /* LATIN SMALL LETTER F WITH HOOK */
    "\x84" => "'", /* DOUBLE LOW-9 QUOTATION MARK */
    "\x85" => "...", /* HORIZONTAL ELLIPSIS */
    "\x86" => "T", /* DAGGER */
    "\x87" => "T", /* DOUBLE DAGGER */
    "\x88" => "^",     /* MODIFIER LETTER CIRCUMFLEX ACCENT */
    "\x89" => "p1000", /* PER MILLE SIGN */
    "\x8a" => "S",     /* LATIN CAPITAL LETTER S WITH CARON */
    "\x8b" => "<", /* SINGLE LEFT-POINTING ANGLE QUOTATION */
    "\x8c" => "OE",     /* LATIN CAPITAL LIGATURE OE */
    "\x8e" => "Z",     /* LATIN CAPITAL LETTER Z WITH CARON */
    "\x91" => "'", /* LEFT SINGLE QUOTATION MARK */
    "\x92" => "'", /* RIGHT SINGLE QUOTATION MARK */
    "\x93" => '"', /* LEFT DOUBLE QUOTATION MARK */
    "\x94" => '"', /* RIGHT DOUBLE QUOTATION MARK */
    "\x95" => ".", /* BULLET */
    "\x96" => "-", /* EN DASH */
    "\x97" => "-", /* EM DASH */
 
    "\x98" => "",     /* SMALL TILDE */
    "\x99" => "(tm)", /* TRADE MARK SIGN */
    "\x9a" => "s",     /* LATIN SMALL LETTER S WITH CARON */
    "\x9b" => ">", /* SINGLE RIGHT-POINTING ANGLE QUOTATION*/
    "\x9c" => "oe",     /* LATIN SMALL LIGATURE OE */
    "\x9e" => "z",     /* LATIN SMALL LETTER Z WITH CARON */
    "\x9f" => "Y",      /* LATIN CAPITAL LETTER Y WITH DIAERESIS*/
   
   
   
    "&euro;" => "euro", /* EURO SIGN */
    "&#8364;" => "euro", /* EURO SIGN */
   
    "&fnof;" => "f",     /* LATIN SMALL LETTER F WITH HOOK */
   
    "&hellip;" => "...", /* HORIZONTAL ELLIPSIS */
    "&dagger;" => "T", /* DAGGER */
    "&Dagger;" => "T", /* DOUBLE DAGGER */
    "&circ;" => "^",     /* MODIFIER LETTER CIRCUMFLEX ACCENT */
    "&permil;" => "p1000", /* PER MILLE SIGN */
    "&Scaron;" => "S",     /* LATIN CAPITAL LETTER S WITH CARON */
    "&#352;" => "S",     /* LATIN CAPITAL LETTER S WITH CARON */
   
    "&OElig;" => "OE",     /* LATIN CAPITAL LIGATURE OE */
    "&#338;" => "OE",     /* LATIN CAPITAL LIGATURE OE */
    "&#381;" => "Z",     /* LATIN CAPITAL LETTER Z WITH CARON */
    "&Zcaron;" => "Z",     /* LATIN CAPITAL LETTER Z WITH CARON */
    "&lsquo;" => "'", /* LEFT SINGLE QUOTATION MARK */
    "&rsquo;" => "'", /* RIGHT SINGLE QUOTATION MARK */
    "&ldquo;" => '"', /* LEFT DOUBLE QUOTATION MARK */
    "&rdquo;" => '"', /* RIGHT DOUBLE QUOTATION MARK */
    "&bull;" => ".", /* BULLET */
    "&ndash;" => "-", /* EN DASH */
    "&mdash;" => "-", /* EM DASH */
 
    "&oline;" => "",     /* SMALL TILDE */
    "&trade;" => "(tm)", /* TRADE MARK SIGN */
    "&scaron;" => "s",     /* LATIN SMALL LETTER S WITH CARON */
    "&#353;" => "s",     /* LATIN SMALL LETTER S WITH CARON */
   
    "&oelig;" => "oe",     /* LATIN SMALL LIGATURE OE */
    "&#339;" => "oe",     /* LATIN SMALL LIGATURE OE */
    "&zcaron;" => "z",     /* LATIN SMALL LETTER Z WITH CARON */
    "&#382;" => "z",     /* LATIN SMALL LETTER Z WITH CARON */
    "&Yuml;" => "Y"      /* LATIN CAPITAL LETTER Y WITH DIAERESIS*/
   
   
 );
 
 return strtr($str, $cp1252_map);
}

 

function detectUTF8($string)
{
        return preg_match('%(?:
        [\xC2-\xDF][\x80-\xBF]        # non-overlong 2-byte
        |\xE0[\xA0-\xBF][\x80-\xBF]               # excluding overlongs
        |[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}      # straight 3-byte
        |\xED[\x80-\x9F][\x80-\xBF]               # excluding surrogates
        |\xF0[\x90-\xBF][\x80-\xBF]{2}    # planes 1-3
        |[\xF1-\xF3][\x80-\xBF]{3}                  # planes 4-15
        |\xF4[\x80-\x8F][\x80-\xBF]{2}    # plane 16
        )+%xs', $string);
}

 


function cleanText($intext) {

 $intext=str_replace('<![CDATA[','',$intext);
 $intext=str_replace(']]>','',$intext);
 

 if (function_exists('iconv')) {
 $intext = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $intext);
 }
 
 if(detectUTF8($intext))
     return "".purge_iso88591(utf8_decode($intext))."";
 else
  return "".purge_iso88591($intext)."";
}

 

function cleanForAutoblog($autoblog_news_description)
{

//if(strlen($autoblog_news_description)>400)
//$autoblog_news_description=substr($autoblog_news_description,0,397).'...';

//$autoblog_news_description=str_replace('  ',' ',$autoblog_news_description);
$autoblog_news_description = ereg_replace("[[:blank:]]+"," ",$autoblog_news_description);
$autoblog_news_description=eregi_replace('</p>',"\r\n",$autoblog_news_description);
$autoblog_news_description=eregi_replace('<br>',"\r\n",$autoblog_news_description);
$autoblog_news_description=eregi_replace('<br/>',"\r\n",$autoblog_news_description);
$autoblog_news_description=eregi_replace('<br />',"\r\n",$autoblog_news_description);

$autoblog_news_description=eregi_replace("\r\n ","\r\n",$autoblog_news_description);
$autoblog_news_description=eregi_replace("\r ","\r\n",$autoblog_news_description);

$autoblog_news_description=eregi_replace("\r\r","\r",$autoblog_news_description);
$autoblog_news_description=eregi_replace("\r\r","\r",$autoblog_news_description);
$autoblog_news_description=eregi_replace("\r\r","\r",$autoblog_news_description);
$autoblog_news_description=eregi_replace("\r\n\r\n","\r\n",$autoblog_news_description);
$autoblog_news_description=eregi_replace("\r\n\r\n","\r\n",$autoblog_news_description);
$autoblog_news_description=eregi_replace("\r\n\r\n","\r\n",$autoblog_news_description);

$autoblog_news_description=strip_tags($autoblog_news_description);

return $autoblog_news_description;
}

 

 

$donnees = array(
'hl' => 'fr',
'gl' => 'fr',
'q' => 'toulouse',
'um' => '1',
'ie' => 'UTF-8',
'output' => 'rss' );


// code à ajouter si mutu chez ovh (file_get_contents passe mais pas avec des variables en get, il faut forcer en post si les variables sont importantes)
function http_build_headers( $headers ) {
       $headers_brut = '';
       foreach( $headers as $nom => $valeur ) {
               $headers_brut .= $nom . ': ' . $valeur . "\r\n";
       }
       return $headers_brut;
}

$contenu = http_build_query( $donnees );


$headers = http_build_headers( array(
'Content-Type' => 'application/x-www-form-urlencoded',
'Content-Length' => strlen( $contenu) ) );


$options = array( 'http' => array( 'user_agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) Gecko/20061010 Firefox/2.0',
'method' => 'POST',
'content' => $contenu,
'header' => $headers ) );

$contexte = stream_context_create( $options );
$autoblog_rss_string = file_get_contents( 'http://news.google.fr/news', false, $contexte );
// fin du code à ajouter

// si pas utile faire directement sans la partie précédente:
//$RSS2Parser = simplexml_load_file("$autoblog_rss_url");

 

$RSS2Parser = simplexml_load_string($autoblog_rss_string);
 
 $racine = $RSS2Parser->channel;
 // pour chaque item
 foreach($racine ->item as $element)
 {
  $news_title = cleanText((string)$element->title);
  $news_uri = cleanText((string)$element->link);
  
  $autoblog_news_type=0;
  
  // traitement particulier pour googlenews
  if(eregi('news.google.com',$news_uri))
  {
   $query=parse_url($news_uri, PHP_URL_QUERY);
   // on cherche l'url
   $a = explode('&', $query);
   $i = 0;
   while ($i < count($a)) {
       $b = split('=', $a[$i]);
    if($b[0]=='url')
       $urlFromGoogle=htmlspecialchars(urldecode($b[1]));
       $i++;
   }
   $news_uri=$urlFromGoogle;
   $autoblog_news_type=1;

  }
  
  $news_desc = cleanText((string)$element->description);
  
  $news_desc=cleanForAutoblog($news_desc);
  
  $news_date = date("YmdHis",strtotime(cleanText((string)$element->pubDate)));
  
  $liste.="<b>$news_title</b> $news_date <br>$news_uri <br>$news_desc <br> <br><br><br>";
  $nbNews++;

  
  
  
 }

echo $liste;




retour

  Tags : googlenews


> Comment parser google News en php avec un mutu chez ovh et un peu d'imagination
Cylman
  Tags : googlenews


> Comment parser google News en php avec un mutu chez ovh et un peu d'imagination



RSS    |    Mes liens    |    Syndication    |    Plan    |    Sitemap XML    |    Création de site web à Carcassonne