Menu/Menü Content/Inhalt
| | Copyright: copyright (C) 2006 by Markus Donhauser | | License: GPL | | Version: 0.5 beta 2 | | | ----------------------------------------------------------------------- /********************************************************************** ** ** A class to search text in pdf documents. ** Not pretending to be useful other than that. ** But it can easily be extended to a full featured pdf document ** parser by anyone who chooses so. ** ** Author: Rene Kluwen / Chimit Software ** ** License: Public Domain ** Warranty: None ** ***********************************************************************/ // MOS Intruder Alerts defined( '_VALID_MOS' ) or die( 'Direct Access to this location is not allowed.' ); class pdf { var $_buffer; var $_filename; // Constructor. Takes the pdf document as only parameter function pdf($file) { $this->_filename = $file; if (file_exists($this->_filename)) { $fp = fopen($this->_filename, "r"); $content = fread($fp,filesize($this->_filename)); fclose($fp); $this->_buffer = $content; } else echo "Error: File not found!"; } // This function returns the next line from the document. // If a stream follows, it is deflated into readable text. function nextline() { $pos = strpos($this->_buffer, "\r"); if ($pos===false) return false; $line = substr($this->_buffer,0,$pos); $this->_buffer = substr($this->_buffer,$pos+1); if (preg_match("/stream/", $line)) { echo "stream found and counted for ..."; $endpos = strpos($this->_buffer,"endstream"); $stream = substr($this->_buffer,1,$endpos-1); $stream = @gzuncompress($stream); $this->_buffer = $stream.substr($this->_buffer,$endpos+9); } return $line; } // This function returns the next line in the document that is printable text. // We need it so we can search in just that portion. function textline() { $line = $this->nextline(); if ($line===false) return false; if (preg_match("/[^\\\\]\\((.+)[^\\\\]\\)/",$line,$match)) { $line = preg_replace("/\\\\(\d+)/e", "chr(0\\1);",$match[1]); return stripslashes($line); } $this->textline(); } function pdf2text() { $filecontent = ''; // String datatype container for the found text... // the file has the extension '.pdf' and needs to be 'txt'... $filename = preg_replace("/(.*)\.([^\.]*)$/", "\\1",$this->_filename).".txt"; while (($line=$this->nextline())!==false) $filecontent .= $line."\n"; if ($fp = fopen("$filename","w+")) { fputs($fp,$filecontent,strlen($filecontent)); fclose($fp); return true; } return false; } // This function returns true or false, indicating whether the document contains // the text that is passed in $str. function textfound($str) { while (($line=$this->textline())!==false) if (stristr($line,$str)!==FALSE) return true; return false; } } ?>
Bildergallerie
 
pa234491.jpg 10


Medium

  wiederherstellen