i have to get coordinates of rectangle in a pdf document and i have got how to get coordinates of acrobat distiller .

  • Thread starter Thread starter abhisheksingha
  • Start date Start date
A

abhisheksingha

Guest
string fileName = @"E:\TextboxPRINT_MS.pdf";
string searthText = @"Paul Chronopoulos";


pdfReader = new PdfReader(fileName);
//for (int page = 1; page <= pdfReader.NumberOfPages; page++)
{
// ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
// string currentPageText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
//Check for identifier
// if (currentPageText.Contains(searthText))
{

//Get Stream object
for (int i = 1; i <= pdfReader.XrefSize; i++)
{
//Get object.
PdfObject obj = pdfReader.GetPdfObject(i);
//Check for stream object.
if (obj != null && obj.IsStream())
{
//Get stream object.
PRStream stream = (PRStream)obj;
if (stream != null)
{
//PdfStream stream2 = (PdfStream)obj;
//Check for FlateDecode object.
PdfObject filterObj = stream.Get(PdfName.FILTER);
if (filterObj != null)
{
bool flateDecodeObj = filterObj.Equals(PdfName.FLATEDECODE);
if (flateDecodeObj)
{





//Get raw bytes.
byte[] streamBytes;
try
{
streamBytes = PdfReader.GetStreamBytes(stream);
}
catch (Exception ex)
{
streamBytes = PdfReader.GetStreamBytesRaw(stream);
}

if (streamBytes.Length != 0)
{
List<string> buf = new List<string>();
string streamData1 = Encoding.ASCII.GetString(streamBytes);
System.IO.File.WriteAllText(@"E:\Table.txt", streamData1);
PRTokeniser tok = new PRTokeniser(new RandomAccessFileOrArray(streamBytes));
if (tok.Length > 0)
{


string identifier = string.Empty;
//string identifier2 = string.Empty;
while (tok.NextToken())
{
//if (tok.TokenType == PRTokeniser.TokType.ENDOFFILE)
//{
// break;
//}
if (tok.TokenType == PRTokeniser.TokType.STRING)
{
string st = tok.StringValue;
identifier = identifier + st;

}

identifier = identifier.Trim();
if (searthText== identifier)
{

if (tok.TokenType == PRTokeniser.TokType.NUMBER)
{
buf.Add(tok.StringValue);
//string st = tok.StringValue;
//identifier2 = identifier2 + st + " ";

}
else if (tok.TokenType == PRTokeniser.TokType.OTHER)
{
//Look for a rectangle token
if (tok.StringValue == "re")
{
break;
////Sanity check, make sure we have enough items in the buffer
//if (buf.Count < 4) throw new Exception("Not enough elements in buffer for a rectangle");
////Read and convert the values
//float x = float.Parse(buf[buf.Count - 4]);
//float y = float.Parse(buf[buf.Count - 3]);
//float w = float.Parse(buf[buf.Count - 2]);
//float h = float.Parse(buf[buf.Count - 1]);
////..do something with them here
}
}

}
}

if (buf.Count > 0)
{
MessageBox.Show("X = " + buf[0].ToString() + "\n" + "Y = " + buf[1].ToString() + "\n" + "Width = " + buf[2].ToString() + "\n" + "Height = " + buf[3].ToString());
return;

}

//try
//{
// byte[] bytes = Convert.FromBase64String(identifier);
// System.Text.UTF8Encoding encoder = new System.Text.UTF8Encoding();
// System.Text.Decoder decoder = encoder.GetDecoder();
// int count = decoder.GetCharCount(bytes, 0, bytes.Length);
// char[] arr = new char[count];
// decoder.GetChars(bytes, 0, bytes.Length, arr, 0);
// identifier = new string(arr);
//}
//catch (Exception exx)
//{
// MessageBox.Show(exx.Message);
//}

identifier = identifier.Trim();

// if (!searthText.Contains(identifier))
// continue;

if (searthText.Contains(identifier))
{
identifier = string.Empty;
PRTokeniser tok2 = new PRTokeniser(new RandomAccessFileOrArray(streamBytes));
while (tok2.NextToken())
{

//if (tok.TokenType == PRTokeniser.TokType.ENDOFFILE)
//{
// break;
//}
if (tok2.TokenType == PRTokeniser.TokType.OTHER)
{
string st = tok2.StringValue;
identifier = identifier + st;

}
}

MessageBox.Show(identifier);
}

}

//

//PRTokeniser tokeniser = new PRTokeniser(new RandomAccessFileOrArray(RASFACTORY.createSource(streamBytes)));
//PdfContentParser ps = new PdfContentParser(tokeniser);
//ArrayList<PdfObject> operands = new ArrayList<PdfObject>();
//while (ps.parse(operands).size() > 0) {
// // PdfLiteral operator = (PdfLiteral) operands.get(operands.size() - 1);
// // processOperator(operator, operands);
//}
// PRTokeniser token = new PRTokeniser(streamBytes);


//Get string from the raw bytes.
string streamData = Encoding.ASCII.GetString(streamBytes);

if (string.IsNullOrEmpty(streamData))
continue;

System.IO.File.WriteAllText(@"F:\Table.txt", streamData);

bool IsMCID = streamData.Contains("Tj");

// if (!IsMCID)
// continue;

//For debug
System.IO.File.WriteAllText(@"F:\Table.txt", streamData);
//continue;

//Get content tokens
string[] tokens = streamData.Split(new[] { "re" }, StringSplitOptions.None);

if (tokens.Length == 0)
continue;

//Get first token
string firstToken = tokens[0].Trim();
//Get last occurance index of the token.
int pFrom = firstToken.LastIndexOf("\n");
//Get the rect coordinates.
string rect = firstToken.Substring(pFrom + 1);

string[] rectArray = rect.Split(' ');
if (rectArray.Length == 4)
{
MessageBox.Show("X = " + rectArray[0].ToString() + "\n" + "Y = " + rectArray[1].ToString() + "\n" + "Width = " + rectArray[2].ToString() + "\n" + "Height = " + rectArray[3].ToString());
return;
}
}
}

}

}
}
}
}
}

}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}

}
}

Continue reading...
 
Back
Top