@@ -8,7 +8,7 @@ const winston = require("winston");
88const logger = winston . loggers . get ( "defaultLogger" ) ;
99const { logUserData } = require ( "./../../utils/helper" ) ;
1010const { customFetch } = require ( "../../utils/helper" ) ;
11- const { Readable } = require ( "stream" ) ;
11+ const stream = require ( "stream" ) ;
1212
1313var JSZip = require ( "jszip" ) ;
1414PDLQueue . on ( "active" , ( job , jobPromise ) => {
@@ -68,59 +68,25 @@ async function getZipAndBytelength(no_of_pages, id, title, job) {
6868 return [ zip , byteLength , errorFlag ] ;
6969}
7070
71- async function getPdfAndBytelength ( pdfUrl , job ) {
72- try {
73- let errorFlag = { status : false , page : "" } ;
74- const response = await customFetch (
75- pdfUrl ,
76- "GET" ,
77- new Headers ( {
78- "Content-Type" : "application/pdf" ,
79- } ) ,
80- "file"
81- ) ;
82- if ( response . status === 200 ) {
83- job . progress ( 30 ) ;
84- const buffer = await response . buffer ( ) ;
85- job . progress ( 60 ) ;
86- return {
87- pdfBuffer : buffer ,
88- byteLength : buffer . byteLength ,
89- errorFlag,
90- } ;
91- } else {
92- logger . log ( {
93- level : "error" ,
94- message : `Failure PDL: Failed to download PDF. Status Code: ${ response . status } ` ,
95- } ) ;
96- errorFlag = { status : true , page : pdfUrl } ;
97- return {
98- pdfBuffer : null ,
99- byteLength : null ,
100- errorFlag,
101- } ;
102- }
103- } catch ( error ) {
104- logger . log ( {
105- level : "error" ,
106- message : `Failure PDL: ${ error } ` ,
107- } ) ;
108- let errorFlag = { status : true , page : pdfUrl } ;
109- return {
110- pdfBuffer : null ,
111- byteLength : null ,
112- errorFlag,
113- } ;
114- }
115- }
116-
117- function setHeaders ( metadata , byteLength , title , contentType ) {
71+ function setHeaders ( metadata , contentLength , title , contentType ) {
11872 let headers = { } ;
73+ const restrictedHeaders = [
74+ "trueuri" ,
75+ "isemailnotification" ,
76+ "iaidentifier" ,
77+ "contenttype" ,
78+ "pdfurl" ,
79+ ] ;
11980 headers [
12081 "Authorization"
12182 ] = `LOW ${ process . env . access_key } :${ process . env . secret_key } ` ;
122- headers [ "Content-type" ] = `application/${ contentType } ` ;
123- headers [ "Content-length" ] = byteLength ;
83+ if ( contentType === "pdf" ) {
84+ headers [ "Content-type" ] = `application/${ contentType } ; charset=utf-8` ;
85+ headers [ "Accept-Charset" ] = "utf-8" ;
86+ } else {
87+ headers [ "Content-type" ] = `application/${ contentType } ` ;
88+ }
89+ headers [ "Content-length" ] = contentLength ;
12490 headers [ "X-Amz-Auto-Make-Bucket" ] = 1 ;
12591 headers [ "X-Archive-meta-collection" ] = "opensource" ;
12692 headers [ "X-Archive-Ignore-Preexisting-Bucket" ] = 1 ;
@@ -134,7 +100,8 @@ function setHeaders(metadata, byteLength, title, contentType) {
134100 ] = `urn:pdl:${ metadata [ "bookID" ] } :${ metadata [ "categoryID" ] } ` ; //To be added
135101 for ( var key in metadata ) {
136102 let meta_key = key . trim ( ) . replace ( / / g, "-" ) . toLowerCase ( ) ;
137- headers [ `X-archive-meta-${ meta_key } ` ] = metadata [ key ] ;
103+ if ( ! _ . includes ( restrictedHeaders , meta_key ) )
104+ headers [ `X-archive-meta-${ meta_key } ` ] = metadata [ key ] ;
138105 }
139106 headers [ "X-archive-meta-title" ] = metadata [ "title" ] ;
140107 headers [ `X-archive-meta-description` ] = `uri(${ encodeURI (
@@ -192,44 +159,71 @@ async function uploadZipToIA(
192159 ) ;
193160}
194161
195- async function uploadPdfToIA (
196- pdfBuffer ,
197- metadata ,
198- byteLength ,
199- email ,
200- job ,
201- onError ,
202- trueURI
203- ) {
162+ function uploadPdfToIA ( pdfUrl , job , metadata , trueURI , done ) {
163+ const getPdf = request ( pdfUrl ) ;
164+ let bufferLength = 0 ;
165+ const chunks = [ ] ;
204166 const bucketTitle = metadata . IAIdentifier ;
205167 const IAuri = `http://s3.us.archive.org/${ bucketTitle } /${ bucketTitle } .pdf` ;
206- let headers = setHeaders (
207- metadata ,
208- byteLength ,
209- metadata . title ,
210- job . data . details . contentType
211- ) ;
212- const options = {
213- method : "PUT" ,
214- uri : IAuri ,
215- headers : headers ,
216- } ;
217- const readableStream = Readable . from ( pdfBuffer ) ;
218- readableStream . pipe (
219- request ( options , ( error , response , body ) => {
220- if ( response . statusCode === 200 ) {
221- EmailProducer ( metadata . userName , metadata . title , trueURI , true ) ;
222- onError ( false , null ) ;
223- } else {
224- logger . log ( {
225- level : "error" ,
226- message : `IA Failure PDL ${ body || error } ` ,
227- } ) ;
228- EmailProducer ( metadata . userName , metadata . title , trueURI , false ) ;
229- onError ( true , body || error ) ;
230- }
231- } )
232- ) ;
168+ getPdf . on ( "response" , function ( data ) {
169+ if ( data . statusCode !== 200 ) {
170+ logger . log ( {
171+ level : "error" ,
172+ message : `Failure PDL: Failed to download PDF. Status Code: ${ data . statusCode } ` ,
173+ } ) ;
174+ done ( new Error ( "Failed to download PDF." ) ) ;
175+ } else {
176+ job . progress ( 20 ) ;
177+ }
178+ } ) ;
179+
180+ getPdf . on ( "end" , function ( ) {
181+ const newBuffer = Buffer . concat ( chunks ) ;
182+ var bufferStream = new stream . PassThrough ( ) ;
183+ bufferStream . end ( newBuffer ) ;
184+ job . progress ( 80 ) ;
185+ let headers = setHeaders (
186+ metadata ,
187+ bufferLength ,
188+ metadata . title ,
189+ job . data . details . contentType
190+ ) ;
191+ bufferStream . pipe (
192+ request (
193+ {
194+ method : "PUT" ,
195+ preambleCRLF : true ,
196+ postambleCRLF : true ,
197+ uri : IAuri ,
198+ headers,
199+ } ,
200+ async ( error , response , body ) => {
201+ if ( error || response . statusCode != 200 ) {
202+ const errorMessage = ! body ? error : body ;
203+ logger . log ( {
204+ level : "error" ,
205+ message : `IA Failure PDL ${ errorMessage } ` ,
206+ } ) ;
207+ if ( metadata . isEmailNotification === "true" ) {
208+ EmailProducer ( job . data . userName , metadata . title , trueURI , false ) ;
209+ }
210+ done ( new Error ( errorMessage ) ) ;
211+ } else {
212+ job . progress ( 100 ) ;
213+ if ( metadata . isEmailNotification === "true" ) {
214+ EmailProducer ( job . data . userName , metadata . title , trueURI , true ) ;
215+ }
216+ done ( null , true ) ;
217+ }
218+ }
219+ )
220+ ) ;
221+ } ) ;
222+
223+ getPdf . on ( "data" , function ( chunk ) {
224+ bufferLength += chunk . length ;
225+ chunks . push ( chunk ) ;
226+ } ) ;
233227}
234228
235229PDLQueue . process ( async ( job , done ) => {
@@ -242,32 +236,12 @@ PDLQueue.process(async (job, done) => {
242236 logUserData ( jobLogs [ "userName" ] , "Panjab Digital Library" ) ;
243237
244238 if ( job . data . details . pdfUrl ) {
245- const { pdfBuffer , byteLength , errorFlag } = await getPdfAndBytelength (
239+ uploadPdfToIA (
246240 job . data . details . pdfUrl ,
247- job
248- ) ;
249- if ( errorFlag . status ) {
250- logger . log ( {
251- level : "error" ,
252- message : `Failure PDL: Failed to download ${ errorFlag . page } ` ,
253- } ) ;
254- done ( new Error ( `Failure PDL: Failed to download ${ errorFlag . page } ` ) ) ;
255- }
256- await uploadPdfToIA (
257- pdfBuffer ,
258- job . data . details ,
259- byteLength ,
260- job . data . details . email ,
261241 job ,
262- ( isError , error ) => {
263- if ( isError ) {
264- done ( new Error ( error ) ) ;
265- } else {
266- job . progress ( 100 ) ;
267- done ( null , true ) ;
268- }
269- } ,
270- trueURI
242+ job . data . details ,
243+ trueURI ,
244+ done
271245 ) ;
272246 } else {
273247 const [ zip , byteLength , errorFlag ] = await getZipAndBytelength (
0 commit comments