doesn't work on windows 10 #10

jericopulvera · 2017-04-17T15:05:06Z

Hello I was wondering why can't I scrape the data of an html page using

using direct url

$html = file_get_html('http://www.google.com/');

echo $html;

Output:

Warning: file_get_contents(): stream does not support seeking in C:\path\project\simple_dom_html.php on line 75

using file path

 $html = file_get_html('main/users.html');
     var_dump($html);

Instead of Html the Output is:
 ["doc":protected]=>                     
 string(1) ">"                           
 ["char":protected]=>                    
 NULL                                    
 ["cursor":protected]=>                  
 int(2)                                  
 ["parent":protected]=>                  
 object(simple_html_dom_node)#2 (9) {    
   ["nodetype"]=>                        
   int(5)                                
   ["tag"]=>                             
   string(4) "root"                      
   ["attr"]=>                            
   array(0) {                            
   }                                     
   ["children"]=>                        
   array(0) {                            
   }                                     
   ["nodes"]=>                           
   array(1) {                            
     [0]=>                               
     object(simple_html_dom_node)#3 (9) {
       ["nodetype"]=>                    
       int(3)                            
       ["tag"]=>                         
       string(4) "text"                  
       ["attr"]=>                        
       array(0) {                        
       }                                 
       ["children"]=>                    
       array(0) {                        
       }                                 
       ["nodes"]=>                       
       array(0) {                        
       }                                 
       ["parent"]=>                      
       *RECURSION*                       
       ["_"]=>                           
       array(1) {                        
         [4]=>                           
         string(1) ">"                   
       }                                 
       ["tag_start"]=>                   
       int(0)                            
       ["dom":"simple_html_dom_node":priv
       *RECURSION*                       
     }                                   
   }                                     
   ["parent"]=>                          
   NULL                                  
   ["_"]=>                               
   array(2) {                            
     [0]=>                               
     int(-1)                             
     [1]=>                               
     int(2)                              
   }                                     
   ["tag_start"]=>                       
   int(0)                                
   ["dom":"simple_html_dom_node":private]
   *RECURSION*                           
 }                                       
 ["noise":protected]=>                   
 array(0) {                              
 }                                       
 ["token_blank":protected]=>             
 string(4) "                             
                                         
 ["token_equal":protected]=>             
 string(4) " =/>"                        
 ["token_slash":protected]=>             
 string(6) " />                          
       "                                 
 ["token_attr":protected]=>              
 string(2) " >"                          
 ["_charset"]=>                          
 string(5) "UTF-8"                       
 ["_target_charset"]=>                   
 string(5) "UTF-8"                       
 ["default_br_text":protected]=>         
 string(2) "                             
                                         
 ["default_span_text"]=>                 
 string(1) " "                           
 ["self_closing_tags":protected]=>       
 array(9) {                              
   ["img"]=>                             
   int(1)                                
   ["br"]=>                              
   int(1)                                
   ["input"]=>                           
   int(1)                                
   ["meta"]=>                            
   int(1)                                
   ["link"]=>                            
   int(1)                                
   ["hr"]=>                              
   int(1)                                
   ["base"]=>                            
   int(1)                                
   ["embed"]=>                           
   int(1)                                
   ["spacer"]=>                          
   int(1)                                
 }                                       
 ["block_tags":protected]=>              
 array(6) {                              
   ["root"]=>                            
   int(1)                                
   ["body"]=>                            
   int(1)                                
   ["form"]=>                            
   int(1)                                
   ["div"]=>                             
   int(1)                                
   ["span"]=>                            
   int(1)                                
   ["table"]=>                           
   int(1)                                
 }                                       
 ["optional_closing_tags":protected]=>   
 array(11) {                             
   ["tr"]=>                              
   array(3) {                            
     ["tr"]=>                            
     int(1)                              
     ["td"]=>                            
     int(1)                              
     ["th"]=>                            
     int(1)                              
   }                                     
   ["th"]=>                              
   array(1) {                            
     ["th"]=>                            
     int(1)                              
   }                                     
   ["td"]=>                              
   array(1) {                            
     ["td"]=>                            
     int(1)                              
   }                                     
   ["li"]=>                              
   array(1) {                            
     ["li"]=>                            
     int(1)                              
   }                                     
   ["dt"]=>                              
   array(2) {                            
     ["dt"]=>                            
     int(1)                              
     ["dd"]=>                            
     int(1)                              
   }                                     
   ["dd"]=>                              
   array(2) {                            
     ["dd"]=>                            
     int(1)                              
     ["dt"]=>                            
     int(1)                              
   }                                     
   ["dl"]=>                              
   array(2) {                            
     ["dd"]=>                            
     int(1)                              
     ["dt"]=>                            
     int(1)                              
   }                                     
   ["p"]=>                               
   array(1) {                            
     ["p"]=>                             
     int(1)                              
   }                                     
   ["nobr"]=>                            
   array(1) {                            
     ["nobr"]=>                          
     int(1)                              
   }                                     
   ["b"]=>                               
   array(1) {                            
     ["b"]=>                             
     int(1)                              
   }                                     
   ["option"]=>                          
   array(1) {                            
     ["option"]=>                        
     int(1)                              
   }                                     
 }

The text was updated successfully, but these errors were encountered:

samacs · 2017-04-17T16:47:36Z

Hi, I don’t maintain this library, I just forked it from sourceforge.net You would probably be better off using composer to include this library as a dependency. And for your specific situation, are you sure you are using the right methods to scrap the document or file? It seems like dumping the variable is giving you a representation of the instance. From that you can read the API of the library (I don’t know where) or the source code itself and see how you can get where you want to. Good luck!

…

-- Saul Martínez

On Apr 17, 2017, 8:05 AM -0700, jericopulvera ***@***.***>, wrote: Hello I was wondering why can't I scrape the data of an html page using using direct url $html = file_get_html('http://www.google.com/'); echo $html; Output: Warning: file_get_contents(): stream does not support seeking in C:\path\project\simple_dom_html.php on line 75 using file path $html = file_get_html('main/users.html'); var_dump($html); Instead of Html the Output is: ["doc":protected]=> string(1) ">" ["char":protected]=> NULL ["cursor":protected]=> int(2) ["parent":protected]=> object(simple_html_dom_node)#2 (9) { ["nodetype"]=> int(5) ["tag"]=> string(4) "root" ["attr"]=> array(0) { } ["children"]=> array(0) { } ["nodes"]=> array(1) { [0]=> object(simple_html_dom_node)#3 (9) { ["nodetype"]=> int(3) ["tag"]=> string(4) "text" ["attr"]=> array(0) { } ["children"]=> array(0) { } ["nodes"]=> array(0) { } ["parent"]=> RECURSION [""]=> array(1) { [4]=> string(1) ">" } ["tag_start"]=> int(0) ["dom":"simple_html_dom_node":priv RECURSION } } ["parent"]=> NULL [""]=> array(2) { [0]=> int(-1) [1]=> int(2) } ["tag_start"]=> int(0) ["dom":"simple_html_dom_node":private] RECURSION } ["noise":protected]=> array(0) { } ["token_blank":protected]=> string(4) " ["token_equal":protected]=> string(4) " =/>" ["token_slash":protected]=> string(6) " /> " ["token_attr":protected]=> string(2) " >" ["_charset"]=> string(5) "UTF-8" ["_target_charset"]=> string(5) "UTF-8" ["default_br_text":protected]=> string(2) " ["default_span_text"]=> string(1) " " ["self_closing_tags":protected]=> array(9) { ["img"]=> int(1) ["br"]=> int(1) ["input"]=> int(1) ["meta"]=> int(1) ["link"]=> int(1) ["hr"]=> int(1) ["base"]=> int(1) ["embed"]=> int(1) ["spacer"]=> int(1) } ["block_tags":protected]=> array(6) { ["root"]=> int(1) ["body"]=> int(1) ["form"]=> int(1) ["div"]=> int(1) ["span"]=> int(1) ["table"]=> int(1) } ["optional_closing_tags":protected]=> array(11) { ["tr"]=> array(3) { ["tr"]=> int(1) ["td"]=> int(1) ["th"]=> int(1) } ["th"]=> array(1) { ["th"]=> int(1) } ["td"]=> array(1) { ["td"]=> int(1) } ["li"]=> array(1) { ["li"]=> int(1) } ["dt"]=> array(2) { ["dt"]=> int(1) ["dd"]=> int(1) } ["dd"]=> array(2) { ["dd"]=> int(1) ["dt"]=> int(1) } ["dl"]=> array(2) { ["dd"]=> int(1) ["dt"]=> int(1) } ["p"]=> array(1) { ["p"]=> int(1) } ["nobr"]=> array(1) { ["nobr"]=> int(1) } ["b"]=> array(1) { ["b"]=> int(1) } ["option"]=> array(1) { ["option"]=> int(1) } } — You are receiving this because you are subscribed to this thread. Reply to this email directly, view it on GitHub, or mute the thread.

jericopulvera closed this as completed May 7, 2017

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

doesn't work on windows 10 #10

doesn't work on windows 10 #10

jericopulvera commented Apr 17, 2017 •

edited

Loading

samacs commented Apr 17, 2017 via email

doesn't work on windows 10 #10

doesn't work on windows 10 #10

Comments

jericopulvera commented Apr 17, 2017 • edited Loading

samacs commented Apr 17, 2017 via email

jericopulvera commented Apr 17, 2017 •

edited

Loading